##############################################################################
##############################################################################
##############################################################################
##############################################################################
## Replication code for:
## Lyall and Zhukov, "Fratricidal Coercion in Modern War"
## ***
## WARNING: Execute run1_setup.R before running this file !!!
## ***
##############################################################################
##############################################################################
##############################################################################
##############################################################################

# Print upon starting
print("**** Starting run2_main.R ****")




##############################################################################
##############################################################################
##############################################################################
## Statistical Analysis of NKVD Presence and Soviet Performance
##
## Figure 1: How did NKVD Presence Impact Soviet Battlefield Performance?
## Table A1.1: Distribution of NKVD Personnel by Front
## Table A1.2: Predictors of NKVD Presence.
## Table A2.3: Coefficient Estimates for Three-Way Fixed Effects Models
## Table A2.4: Coefficient Estimates for Three-Way Random Effects Models
## Figure A3.1: Distribution of Placebo Effects across 10,000 Simulations
## Figure A3.2: Time-variant coefficient estimates, fixed effects.
## Figure A3.3: Time-variant coefficient estimates, random effects.
## Figure A3.4: Subsample Analyses: First Months of Deployment.
## Figure A3.5: Sensitivity Analysis of Variable Division Strength.
## Table A3.5: Coefficient Estimates for Peer Effects Model (within-army peer effects).
## Table A3.6: Coefficient Estimates for Peer Effects Model (within-front peer effects).
## Figure A3.6: Did Officers Respond Differently to Fratricidal Coercion?
## Table A3.7: Coefficient Estimates with “First Month” Dummies.
## Table A3.8: Coefficient Estimates with “Deployment Duration” Cubic Spline.
##############################################################################
##############################################################################
##############################################################################

print("Running: code chunk 1")
# Clear workspace, source functions
rm(list=ls())
source("code/functions.R")

##################################
# Load and prepare data
##################################

# Load and preprocess data
X <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Subset to division-level only
X_s <- X %>% .[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]

##################################
# Specify and estimate models
##################################

# 3-way FE and RE models
model_fe <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))

# Loop over DV's
mermods <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe)

# Check Hausman stats
mermods[hausman_p<.05&grepl("FE",model_name),]
mermods[hausman_p>=.05&grepl("RE",model_name),]

##################################
# Make Figure 1
##################################

# CI plot FE (simplified)
remods_ <- mermods %>% .[grepl("FE pop",model_name),] %>% .[,outcome_name := factor(outcome_name, levels = rev(yvarz))] %>% mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% setDT() %>% .[,outcome_label := outcome_name %>% match(yvarz) %>% ylabz[.] %>% factor(., levels = rev(ylabz))] %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")]
p <- ggplot2::ggplot(remods_,  ggplot2::aes()) +
    ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    ggplot2::geom_pointrange(ggplot2::aes(x = outcome_label, y = b, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), shape = 16, fatten=1.618, fill = "WHITE")  + 
    ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618, axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),title=ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("") + ggplot2::ylab("")

# Save to png
w_ <- 3.25*1.618; h_ <- 2.9
png("results/fig1.png", width = w_, height = h_, units = "in",res=300)
p
dev.off()


##################################
# Make Table A1.1
##################################

# Load front-level reference table
frtx_ref <- readRDS("data/fronts_ref.RDS")

# Aggregate by front
X_byfront <- X[FRONT %in% frtx_ref[CANONICAL==1,FRONT_ABB],sum(NKVD_OO),by=FRONT] %>% data.table::setnames("V1","NKVD") %>% merge(frtx_ref[CANONICAL==1],by.x="FRONT",by.y="FRONT_ABB",all.x=TRUE,all.y=FALSE) %>% .[,.(FRONT_EN,NKVD,Start,End,Battles)] %>% na.omit() %>% .[order(NKVD)] %>% .[,NKVD:=NKVD%>% format(big.mark = ",")] %>% data.table::setnames(c("Front","NKVD Personnel","Start","End","N. Battles"))

# Export to LaTeX
note <- paste0("NOTE: The table lists the cumulative number of NKVD personnel assigned to each Front of the Red Army over the course of the war. Start and end dates correspond to first and last days of active operations. Numbers of battles represent cumulative engagements involving division-level units subordinate to each Front.\n}")
xmat <- xtable::xtable(X_byfront,caption = "{\\bf Distribution of NKVD Personnel by Front\\\\}",label="tab:nkvd_front",align=paste0("lp{2.5cm}p{3.5cm}p{1.75cm}p{1.75cm}p{1.75cm}")) %>% data.table::setnames(paste0("\\multicolumn{1}{c}{",names(.),"}")) 
for(r0 in c(1:nrow(xmat))){xmat[r0,] <- paste0("\\multicolumn{1}{c}{",xmat[r0,],"}")}
xmat %>%  xtable::print.xtable(.,file=tempfile(),hline.after=NULL,sanitize.colnames.function = identity,sanitize.text.function=identity,caption.placement="top",size="\\small",include.rownames=FALSE,add.to.row=list(pos=list(-1,0, nrow(X_byfront)),command=c("\\toprule ","\\midrule ", "\\bottomrule "))) %>% gsub("\\end{tabular}\n", paste0("\n\\multicolumn{5}{p{5in}}{{\\footnotesize ", note, "}\\\\ \\end{tabular}"), ., fixed = TRUE) %>% cat(., sep = "\n", file =  "results/tabA1_1.tex")


############################
# Make Table A1.2
############################

# Estimate NKVD Assignment Model
mod_assign <- lfe::felm(log(NKVD_OO+1) ~ TID + UNIT_TYPE + RUS_100 + soldiers_dist + AGE_1941 + URBAN_PCT + POP_DENSITY + log(M_10km_ADM3+1) | ARMY, data=X_s, weights = X_s$N_SOLDIERS); summary(mod_assign)

# Regression table
betas <- summary(mod_assign)["coefficients"][[1]]%>%data.table::as.data.table()%>%.[,rn:=row.names(summary(mod_assign)["coefficients"][[1]])]
labz <- data.table::data.table(rn=c("TID","UNIT_TYPEART_AAD","UNIT_TYPEAVIATION","UNIT_TYPEENGINEER","UNIT_TYPEINF_RFL_ABN","RUS_100","soldiers_dist","AGE_1941","URBAN_PCT","POP_DENSITY","log(M_10km_ADM3 + 1)"),rn_lab=c("Month of war","Artillery/AAD","Aviation","Engineer","Rifle","Pct.Russian","Geo.Diversity","Avg.Age","Urbanization","Pop.Density","Pre-War Repression"))
capz_fe <- "\\textbf{Predictors of NKVD Presence}. Dependent variable is logged number of OO/SMERSH personnel assigned to each division-month. Observations weighted by number of monthly discharge records. Percentage point change calculated as $(e^{\\hat{\\beta}}-1)\\times 100$."; lbz_fe <- "tab:reg_assign"
fnz <- "tabA1_2.tex"
options(scipen=999)
regtab <- data.table::data.table(Variable=c(as.character(unlist(do.call(rbind, apply(data.frame(betas[,rn]), 1, function(x) {rbind(x, data.frame(""))})))),"AIC","Army FE","N")) %>% .[Variable %in% labz[,rn],Variable := Variable %>% match(labz[,rn]) %>% labz[.,rn_lab]] %>% .[,`Coefficient (95\\% CI)`:=c(as.character(unlist(do.call(rbind, apply(data.frame(betas[,SUNGEO::smart_round(Estimate,2)]), 1, function(x) {rbind(x, data.frame(""))})))),stats::AIC(mod_assign)%>%SUNGEO::smart_round(0),as.integer(mod_assign$fe %>% sapply(function(x){length(levels(x))})),as.integer(mod_assign["N"]))]%>%.[`Coefficient (95\\% CI)`%in%"",`Coefficient (95\\% CI)`:=betas[,paste0("(",SUNGEO::smart_round(Estimate-1.96*`Std. Error`,1),",",SUNGEO::smart_round(Estimate+1.96*`Std. Error`,1),")")]]%>% .[,`Pct.Change`:= c(as.character(unlist(do.call(rbind, apply(data.frame(SUNGEO::smart_round((summary(mod_assign)["coefficients"][[1]][,"Estimate"]%>%exp()-1)*100,1)), 1, function(x) {rbind(x, data.frame(""))})))),"","","")] 
regtab %>% xtable::xtable(caption=capz_fe,label=lbz_fe,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% xtable::print.xtable(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(-1,0,nrow(betas)*2,nrow(betas)*2+3),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),caption.placement="top",sanitize.text.function=identity)



##################################
# Make Tables A2.3 & A2.4
##################################

# Table labels
labz <- data.table(rn=c("b","ci","reml","icc_DIV_ARMY","icc_bxid","icc_TID","icc_Residual","hausman_p","aic","bic","N_DIV_ARMY","N_bxid","N_TID","n"),rn_lab=c("Estimate","95% CI","REML","ICC (unit)","ICC (battle)","ICC (month)","ICC (residual)","Hausman p","AIC","BIC","Unit FE","Battle FE","Month FE","N"))

# Fixed effects (Table A2.3)
fnz <- "tabA2_3.tex"
capz_fe <- "\\textbf{Coefficient Estimates for Three-Way Fixed Effects Models}. Observations weighted by number of discharge records per unit-month. Null hypothesis for Hausman test: random effects model is consistent."; lbz_fe <- "tab:fe_pop"
xlabz <- data.table::data.table(rn=c("AGE_1941","soldiers_dist","RUS_100","POP_DENSITY","URBAN_PCT","M_10km_ADM3"),rn_lab=c("Avg.Age","Geo.Diversity","Pct.Russian","Pop.Density","Urbanization","PrewarRepress"))
# Main effecrs
regtab <- mermods %>% .[grepl("FE pop",model_name),]  %>% dplyr::mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% data.table::setDT() %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")] %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")] %>% .[,hausman_p := hausman_p%>%(function(x){ifelse(x<0.001,"<0.001",SUNGEO::smart_round(x))})] %>% .[,c("b","N_DIV_ARMY","N_bxid","N_TID","n") := lapply(.SD,SUNGEO::smart_round,rnd=3),.SDcols=c("b","N_DIV_ARMY","N_bxid","N_TID","n")]  %>% .[,aic := aic %>% smart_round(1)]  %>% .[,.(outcome_name,b,ci,reml,icc_DIV_ARMY,icc_bxid,icc_TID,icc_Residual,hausman_p,aic,N_DIV_ARMY,N_bxid,N_TID,n)] %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz_short[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,] %>% .[,outcome_name := outcome_name %>% match(labz[,rn]) %>% labz[.,rn_lab]]
# Covariates
regtab_x <- lapply(1:nrow(xlabz),function(i0){
  xrow <- mermods %>% .[grepl("FE pop",model_name),] %>% .[,.SD,.SDcols=paste0(c("b_","l_","u_"),xlabz[i0,rn]),by=outcome_name] %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")] %>% .[,ci := paste0("(",get(paste0("l_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1),",",get(paste0("u_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1),")")] %>% data.table::setnames("ci",paste0("ci_",xlabz[i0,rn]))%>%.[,eval(paste0("b_",xlabz[i0,rn])):=get(paste0("b_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1)]%>%dplyr::select(paste0(c("b_","ci_"),xlabz[i0,rn]))
  xrow
})%>%dplyr::bind_cols()%>%.[,.SD,.SDcols=!grepl("Intercept",names(.))]%>%t() %>% as.data.table(keep.rownames=T)%>%data.table::setnames(names(regtab))%>%.[,outcome_name := outcome_name %>% match(xlabz[,paste0("b_",rn)]) %>% xlabz[.,rn_lab]] %>% .[is.na(outcome_name),outcome_name := ""]
list(regtab[1:2],regtab_x,regtab[3:.N])%>%data.table::rbindlist() %>% xtable::xtable(caption=capz_fe,label=lbz_fe,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,2,2+nrow(regtab_x)),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),size="footnotesize",caption.placement="top")

# Random effects (Table A2.4)
fnz <- "tabA2_4.tex"
xlabz <- data.table::data.table(rn=c("Intercept","AGE_1941","soldiers_dist","RUS_100","POP_DENSITY","URBAN_PCT","M_10km_ADM3"),rn_lab=c("Intercept","Avg.Age","Geo.Diversity","Pct.Russian","Pop.Density","Urbanization","PrewarRepress"))
capz_re <- "\\textbf{Coefficient Estimates for Three-Way Random Effects Models}. Observations weighted by number of discharge records per unit-month. ICC:  intraclass correlation coefficient. Null hypothesis for Hausman test: random effects model is consistent."; lbz_re <- "tab:re_pop"
regtab <- mermods %>% .[grepl("RE pop",model_name),]  %>% dplyr::mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% data.table::setDT() %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")] %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")] %>% .[,hausman_p := hausman_p%>%(function(x){ifelse(x<0.001,"<0.001",SUNGEO::smart_round(x))})] %>% .[,c("b","icc_DIV_ARMY","icc_bxid","icc_TID","icc_Residual","N_DIV_ARMY","N_bxid","N_TID","n") := lapply(.SD,SUNGEO::smart_round,rnd=3),.SDcols=c("b","icc_DIV_ARMY","icc_bxid","icc_TID","icc_Residual","N_DIV_ARMY","N_bxid","N_TID","n")] %>% .[,reml := reml %>% SUNGEO::smart_round(1)] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]  %>% .[,.(outcome_name,b,ci,reml,icc_DIV_ARMY,icc_bxid,icc_TID,icc_Residual,hausman_p,aic,N_DIV_ARMY,N_bxid,N_TID,n)] %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz_short[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,] %>% .[,outcome_name := outcome_name %>% match(labz[,rn]) %>% labz[.,rn_lab]]
regtab_x <- lapply(1:nrow(xlabz),function(i0){
  xrow <- mermods %>% .[grepl("RE pop",model_name),] %>% .[,.SD,.SDcols=paste0(c("b_","l_","u_"),xlabz[i0,rn]),by=outcome_name] %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")] %>% .[,ci := paste0("(",get(paste0("l_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1),",",get(paste0("u_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1),")")] %>% data.table::setnames("ci",paste0("ci_",xlabz[i0,rn]))%>%.[,eval(paste0("b_",xlabz[i0,rn])):=get(paste0("b_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1)]%>%dplyr::select(paste0(c("b_","ci_"),xlabz[i0,rn]))
  xrow
})%>%dplyr::bind_cols()%>%.[,.SD,.SDcols=!grepl("Intercept",names(.))]%>%t() %>% as.data.table(keep.rownames=T)%>%data.table::setnames(names(regtab))%>%.[,outcome_name := outcome_name %>% match(xlabz[,paste0("b_",rn)]) %>% xlabz[.,rn_lab]] %>% .[is.na(outcome_name),outcome_name := ""]
list(regtab[1:2],regtab_x,regtab[3:.N])%>%data.table::rbindlist() %>% xtable::xtable(caption=capz_re,label=lbz_re,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,2,2+nrow(regtab_x)),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),size="footnotesize",caption.placement="top")



###########################
# Make Figure A3.1
###########################

rm(list=ls())
source("code/functions.R")

# Load data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez() %>% .[ECH_DIV==1,]

# Specify models
model_fe <- Formula::Formula(y ~  log(NKVD_RNDM+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )
model_re <- as.formula(y ~ log(NKVD_RNDM+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))

# Set up and clear temp directory
m_dir <- tempdir()
system(paste0("rm -r ",m_dir,"/*"))

# Randomization inference
m0 <- 1
y0 <- 1
nsim <- 1000
for(m0 in 1:nsim){
  print(paste0("Randomization inference ",m0,"/",nsim))
  set.seed(m0)

  # New temo directory
  m_all <- m_dir %>% paste0("/",m0)

  # Loop over DV's
  X_s <- X_ %>% .[ECH_DIV==1,] %>% .[,NKVD_RNDM := NKVD_OO[sample(.N)]]
  {
  t1 <- Sys.time()
  mermods <- parallel::mclapply(1:length(yvarz),function(y0){
    mod_fe <- lfe::felm(model_fe, data = X_s[,c("y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)], weights = X_s$weights, exactDOF=T); summary(mod_fe)
    grp_fe <- mod_fe$fe %>% sapply(function(x){length(levels(x))})%>%.[order(names(.))]%>%t()%>%data.table::as.data.table()%>%data.table::setnames(paste0("N_",names(.)))
    betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
    other_betas <- summary(mod_fe)["coefficients"][[1]] %>% .[!grepl("NKVD",row.names(.)),]
    row.names(other_betas) <- row.names(other_betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.)
    other_betas <- lapply(1:nrow(other_betas),function(i0){
        out <- data.table::data.table(b=other_betas[i0,1],l=other_betas[i0,1]-other_betas[i0,2]*1.96,u=other_betas[i0,1]+other_betas[i0,2]*1.96)%>%data.table::setnames(paste0(names(.),"_",row.names(other_betas)[i0]))
    })%>% dplyr::bind_cols()
    m6 <- data.table(outcome_name=yvarz[y0],model_name="FE pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68,aic=stats::AIC(mod_fe),bic=stats::BIC(mod_fe)) %>% dplyr::bind_cols(other_betas,grp_fe)%>%dplyr::select(names(.)[1:7],names(other_betas),dplyr::everything())
    m6
  },mc.cores=7) %>% dplyr::bind_rows()

  t2 <- Sys.time(); print(t2-t1)
  }

  # Write to tamp directory
  data.table::fwrite(mermods,m_all)
}

# Load results
mermods_ <- lapply(1:nsim,function(f0){data.table::fread(m_dir %>% paste0("/",f0))}) %>% dplyr::bind_rows()

# Compare to base specification
model_fe_ <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )
model_re_ <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))
mermods <- ensemble_modz(X_s=X_,yvarz=yvarz,model_re=model_re_,model_fe=model_fe_)

# Density plot
w_ <- 2*1.618*.7; h_ <- 2*.9
fnamz <- "figA3_1"
fig_dir <- "results/"
fig_out <- lapply(1:length(yvarz),function(y0){
  mod1 <- mermods_[outcome_name%in%yvarz[y0]&grepl("FE po",model_name),] %>% .[order(b)] %>% .[,sim := .I]
  mod1_base <- mermods[outcome_name%in%yvarz[y0]&grepl("FE po",model_name),]
  p <- ggplot2::ggplot(mod1) +
      ggplot2::geom_density(ggplot2::aes(y=b), alpha=.2, fill="white") + 
      ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
      ggplot2::geom_hline(yintercept = mod1_base[,b], colour = "blue", lty = 1,size=.5) +
      ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618, axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("Density") + ggplot2::ylab("") + ggplot2::labs(title = paste0(ylabz[y0]))  
  png(paste0(fig_dir,fnamz,"_",sprintf("%02d", y0),".png"), width = w_, height = h_, units = "in", res=300)
  print(p)
  dev.off()
}); rm(fig_out)




###########################
# Make Figures A3.2, A3.3
###########################

rm(list=ls())
source("code/functions.R")

# Load and preprocess data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Subset to div-level
X_s <- X_[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]

# Specify time-variant coefficient models
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)  + (1|bxid)+(1+log(NKVD_OO+1)|YRMO))
model_fe <- Formula::Formula("y ~ YRMO:log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3)| DIV_ARMY + YRMO"%>%as.formula())

# Specify base models
model_fe_base <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )
model_re_base <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))
mermods <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re_base,model_fe=model_fe_base)

# Axis labels
xlabs <- X_s[,unique(YRMO)%>%sort()]%>%data.table::data.table(YRMO=.)%>%.[,TID:=.I]%>%.[,lab:=zoo::as.yearmon(YRMO, "%Y%m") ]

# New model loop
y0 <- 1
rsmods <- parallel::mclapply(seq_along(yvarz),function(y0){

  # Fit FE model
  suppressMessages({
    mod_fe <- lfe::felm(model_fe, data = X_s[,c("y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)], weights = X_s$weights); 
  })
  # Extract estimates
  a <- mermods[outcome_name%in%yvarz[y0]&grepl("FE pop",model_name),b]
  a_l <- mermods[outcome_name%in%yvarz[y0]&grepl("FE pop",model_name),l]
  a_u <- mermods[outcome_name%in%yvarz[y0]&grepl("FE pop",model_name),u]
  betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep("NKVD|YRMO",row.names(.)),]
  mean_ <- betas[,1]%>%as.data.frame()
  ci_l <- betas[,1]-1.96*betas[,2]%>%as.data.frame()
  ci_u <- betas[,1]+1.96*betas[,2]%>%as.data.frame()

  # Make Figure A3.2
  w_ <- 8
  png(paste0("results/figA3_2_",letters[y0],".png"),width=w_,height=w_/1.618/2,units="in",res=300)
  par(mar=c(2,2.5,.5,.5))
  plot(x=0,y=0,col=NA,ylim=range(unlist(c(ci_l,ci_u))),xlim=c(0,nrow(mean_)),bty="n",xaxt="n",xlab="",ylab="",yaxt="n")
  axis(2,at=pretty(range(unlist(c(ci_l,ci_u)))),las=2)
  axis(1,at=1:nrow(mean_),labels=NA)
  axis(1,at=xlabs[,which(grepl("01$",YRMO))],labels=xlabs[which(grepl("01$",YRMO)),substr(YRMO,1,4)],lwd.ticks=1.618)
  segments(x0=xlabs[,which(grepl("01$",YRMO))],x1=xlabs[,which(grepl("01$",YRMO))],y0=min(ci_l[,1])-sd(ci_l[,1]),y1=max(ci_u[,1])+sd(ci_u[,1]),lty=3,lwd=1/1.618)
  rect(xleft=0,xright=nrow(mean_)+1,ybottom=a_l,ytop=a_u,border=NA,col=rgb(.75,.75,.75,.25))
  segments(x0=0,x1=nrow(mean_)+1,y0=a,y1=a,col="grey",lwd=1.618)
  abline(h=0,lty=2)
  segments(x0=1:nrow(mean_)+.5,x1=1:nrow(mean_)+.5,y0=ci_l[,1],y1=ci_u[,1])
  points(x=1:nrow(mean_)+.5,y=mean_[,1],pch=16)
  dev.off()

  # Fit random slopes model
  suppressMessages({
    mod_xc <- lme4::lmer(model_re, data=X_s[,c("y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)],weights=X_s$weights, control = lme4::lmerControl(optimizer ='optimx', optCtrl=list(method='nlminb')), REML = FALSE); summary(mod_xc)
  })
  # Extract estimates
  a_ <- lme4::fixef(mod_xc)
  a <- mermods[outcome_name%in%yvarz[y0]&grepl("RE pop",model_name),b]
  a_l <- mermods[outcome_name%in%yvarz[y0]&grepl("RE pop",model_name),l]
  a_u <- mermods[outcome_name%in%yvarz[y0]&grepl("RE pop",model_name),u]
  b <- ranef(mod_xc, condVar=TRUE)
  e <- attr(b$YRMO, "postVar") %>% sqrt() %>% .[2,2,] 
  ci_l <- (b$YRMO[2]+a_[2])-(e*1.96)
  mean_ <- (b$YRMO[2]+a_[2])
  ci_u <- (b$YRMO[2]+a_[2])+(e*1.96)
  
  # Make FIgure A3.3
  w_ <- 8
  png(paste0("results/figA3_3_",letters[y0],".png"),width=w_,height=w_/1.618/2,units="in",res=300)
  par(mar=c(2,2.5,.5,.5))
  plot(x=0,y=0,col=NA,ylim=range(unlist(c(ci_l,ci_u))),xlim=c(0,nrow(mean_)),bty="n",xaxt="n",xlab="",ylab="",yaxt="n")
  axis(2,at=pretty(range(unlist(c(ci_l,ci_u)))),las=2)
  axis(1,at=1:nrow(mean_),labels=NA)
  axis(1,at=xlabs[,which(grepl("01$",YRMO))],labels=xlabs[which(grepl("01$",YRMO)),substr(YRMO,1,4)],lwd.ticks=1.618)
  segments(x0=xlabs[,which(grepl("01$",YRMO))],x1=xlabs[,which(grepl("01$",YRMO))],y0=min(ci_l[,1])-sd(ci_l[,1]),y1=max(ci_u[,1])+sd(ci_u[,1]),lty=3,lwd=1/1.618)
  rect(xleft=0,xright=nrow(mean_)+1,ybottom=a_l,ytop=a_u,border=NA,col=rgb(.75,.75,.75,.25))
  segments(x0=0,x1=nrow(mean_)+1,y0=a,y1=a,col="grey",lwd=1.618)
  abline(h=0,lty=2)
  segments(x0=1:nrow(mean_)+.5,x1=1:nrow(mean_)+.5,y0=ci_l[,1],y1=ci_u[,1])
  points(x=1:nrow(mean_)+.5,y=mean_[,1],pch=16)
  # title(paste0("Impact of doubling NKVD presence on percent ",ylabz[y0]))
  dev.off()

},mc.cores=5) 




###########################
# Make Figure A3.4
###########################


# Source functions
rm(list=ls())
source("code/functions.R")

# Load data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Specify models
model_fe <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))
model_fe_ <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | ARMY + TID + bxid )
model_re_ <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|ARMY)+(1|TID)+(1|bxid))

# Sensitivity analysis: first appearances by unit
# Subsample (first appearance of each unit in battle-month)
X_ix <- X_ %>% .[ECH_DIV==1,min(TID),by=c("UNIT_UA","bxid")] %>% data.table::setnames("V1","TID")
X_s <- X_ %>% .[paste(UNIT_UA,bxid,TID,sep="_")%in%X_ix[,paste(UNIT_UA,bxid,TID,sep="_")],] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]
mermods_1bat <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe)
mermods_1bat[,c("subset","size") := .("First of battle",nrow(X_s))]

# Subsample (first appearance of each unit in war)
X_ix <- X_ %>% .[ECH_DIV==1,min(TID),by=c("UNIT_UA")] %>% data.table::setnames("V1","TID")
X_s <- X_ %>% .[paste(UNIT_UA,TID,sep="_")%in%X_ix[,paste(UNIT_UA,TID,sep="_")],] %>% .[!duplicated(paste(UNIT_UA,TID,sep="_")),]
mermods_1war <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re_,model_fe=model_fe_)
mermods_1war[,c("subset","size") := .("First of war",nrow(X_s))]

# Combine
mermods <- dplyr::bind_rows(mermods_1bat,mermods_1war)
mermods[hausman_p<.05&grepl("FE",model_name),]
mermods[hausman_p>.05&grepl("RE",model_name),]

# Make Figure A3.4
remods_ <- mermods %>% .[grepl("FE pop",model_name),] %>% .[,outcome_name := factor(outcome_name, levels = rev(yvarz))] %>% .[,weighted := model_name %>% gsub("pop weights","weighted",.) %>% gsub("^RE ","",.)] %>% .[,weighted := factor(weighted)]  %>% mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% setDT() %>% .[,outcome_label := outcome_name %>% match(yvarz) %>% ylabz[.] %>% factor(., levels = rev(ylabz))] 
cbPalette <- c("grey60","black")
p <- ggplot2::ggplot(remods_,  ggplot2::aes(colour=subset)) +
    ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    ggplot2::geom_pointrange(ggplot2::aes(x = outcome_label, y = b, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), shape = 16, fatten=1.618, fill = "WHITE")  + 
    ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618,legend.position = c(0.8, 0.2), legend.direction = "horizontal", legend.title = ggplot2::element_blank(), axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),title=ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("") + ggplot2::ylab("") + 
    ggplot2::guides(col = ggplot2::guide_legend(reverse=TRUE,ncol = 1, byrow = FALSE,override.aes = list(linetype = rep("blank", 2), shape = rep(16, 2)))) + ggplot2::scale_colour_manual(values=cbPalette)
w_ <- 3.25*1.618; h_ <- 2.9
png("results/figA3_4.png", width = w_, height = h_, units = "in", res=300) 
p
dev.off()



###########################
# Make Figure A3.5
###########################


rm(list=ls())
source("code/functions.R")

# Load and preprocess data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez() 

# Models
model_fe <- Formula::Formula(y ~  log(NKVD_PROP+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )
model_re <- as.formula(y ~ log(NKVD_PROP+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))

# Prepare temp directory
m_dir <- tempdir()
system(paste0("rm -r ",m_dir,"/*"))
dir(m_dir)

# Loop over DV's
nsim <- 1000
for(m0 in 1:nsim){
  print(paste0(m0,"/",nsim))
  set.seed(m0)
  m_all <- m_dir %>% paste0("/",m0)

  # Loop over DV's
  X_s <- X_ %>% .[ECH_DIV==1,] %>% .[,NKVD_PROP := NKVD_OO*15000/runif(.N,15000*.4,15000)] %>% .[NKVD_OO>0]
  {
    t1 <- Sys.time()
    mermods <- parallel::mclapply(1:length(yvarz),function(y0){
      # FE pop
      mod_fe <- lfe::felm(model_fe, data = X_s[,c("y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)], weights = X_s$weights, exactDOF=T); summary(mod_fe)
      grp_fe <- mod_fe$fe %>% sapply(function(x){length(levels(x))})%>%.[order(names(.))]%>%t()%>%data.table::as.data.table()%>%data.table::setnames(paste0("N_",names(.)))
      betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
      other_betas <- summary(mod_fe)["coefficients"][[1]] %>% .[!grepl("NKVD",row.names(.)),]
      row.names(other_betas) <- row.names(other_betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.)
      other_betas <- lapply(1:nrow(other_betas),function(i0){
          out <- data.table::data.table(b=other_betas[i0,1],l=other_betas[i0,1]-other_betas[i0,2]*1.96,u=other_betas[i0,1]+other_betas[i0,2]*1.96)%>%data.table::setnames(paste0(names(.),"_",row.names(other_betas)[i0]))
      })%>% dplyr::bind_cols()
      m6 <- data.table(outcome_name=yvarz[y0],model_name="FE pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68,aic=stats::AIC(mod_fe),bic=stats::BIC(mod_fe)) %>% dplyr::bind_cols(other_betas,grp_fe)%>%dplyr::select(names(.)[1:7],names(other_betas),dplyr::everything())
      m6
    },mc.cores=7) %>% dplyr::bind_rows()
    t2 <- Sys.time(); print(t2-t1)
  }
  data.table::fwrite(mermods,m_all)
}
mermods_ <- lapply(1:nsim,function(f0){data.table::fread(m_dir %>% paste0("/",f0))}) %>% dplyr::bind_rows()

# Compare to base specification
model_re_ <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))
model_fe_ <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )

# Estimate base models
X_s <- X_[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]
mermods <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re_,model_fe=model_fe_)

# Density plot
w_ <- 2*1.618*.7; h_ <- 2*.9
fnamz <- "figA3_5"
fig_dir <- "results/"
for(y0 in 1:length(yvarz)){
  mod1 <- mermods_[outcome_name%in%yvarz[y0]&grepl("FE po",model_name),] %>% .[order(b)] %>% .[,sim := .I]
  mod1_base <- mermods[outcome_name%in%yvarz[y0]&grepl("FE po",model_name),]
  p <- ggplot2::ggplot(mod1) +
      ggplot2::geom_density(ggplot2::aes(y=b), alpha=.2, fill="white") + 
      ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
      ggplot2::geom_hline(yintercept = mod1_base[,b], colour = "blue", lty = 1,size=.5) +
      ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618, axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("Density") + ggplot2::ylab("") +
      ggplot2::labs(title = paste0(ylabz[y0]))  
  png(paste0(fig_dir,fnamz,"_",sprintf("%02d", y0),".png"), width = w_, height = h_, units = "in",res=300)
  print(p)
  dev.off()
}




###########################
# Make Tables A3.5, A3.6
###########################

rm(list=ls())
source("code/functions.R")

# Load and preprocess data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Leave-one-out lags
X_ <- X_ %>% 
  .[!is.na(DIV_ARMY)&DIV_ARMY!="", eval(gsub("_100|KVD","_LOOF",c(yvarz,"NKVD_ALL","NKVD_OO"))):=lapply(.SD,function(y){sapply(DIV_ARMY, function(x){mean(y[-match(x,DIV_ARMY)])})}) , by=.(FRONT, YRMO), .SDcols=c(yvarz,"NKVD_ALL","NKVD_OO")] %>% 
  .[!is.na(DIV_ARMY)&DIV_ARMY!="", eval(gsub("_100|KVD","_LOOA",c(yvarz,"NKVD_ALL","NKVD_OO"))):=lapply(.SD,function(y){sapply(DIV_ARMY, function(x){mean(y[-match(x,DIV_ARMY)])})}) , by=.(ARMY, YRMO), .SDcols=c(yvarz,"NKVD_ALL","NKVD_OO")] %>% 
  .[!is.na(DIV_ARMY)&DIV_ARMY!="", eval(gsub("_100|KVD","_LOO",c(yvarz,"NKVD_ALL","NKVD_OO"))):=lapply(.SD,function(y){sapply(DIV_ARMY, function(x){mean(y[-match(x,DIV_ARMY)])})}) , by=.(YRMO), .SDcols=c(yvarz,"NKVD_ALL","NKVD_OO")] %>% .[,eval(grep("_LOO",names(.),value=TRUE)%>%paste0("_t")):=lapply(.SD,data.table::shift),.SDcols=grep("_LOO",names(.),value=TRUE),by=DIV_ARMY]

# Subset
X_s <- X_[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]

# Types of LOO
looz <- c("A","F")

#

# Baseline estimates
for(l0 in looz){

  # Peer effects model
  model_re <- as.formula(paste0("y ~ log(NKVD_OO+1) + log(N_LOO",l0,"_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid)"))
  model_fe <- Formula::Formula(as.formula(paste0("y ~  log(NKVD_OO+1) + log(N_LOO",l0,"_OO+1)+ scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid ")))

  # Baseline
  mermods <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe,loo_type=l0)
  mermods[hausman_p<.05&grepl("FE pop",model_name),1:5]
  mermods[hausman_p>=.05&grepl("RE pop",model_name),]

  # Bootstrapped SEs
  nsim <- 500
  y0 <- 1
  boot_out <- lapply(1:nsim,function(i0){print(paste0("Peer effects ",l0,". ",i0,"/",nsim))
    X_boot <- X_s[sample(.N,replace=TRUE)]
    mermods_boot <- parallel::mclapply(1:length(yvarz),function(y0){
      # FE pop
      mod_fe <- lfe::felm(model_fe, data = X_boot[,c("y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)], weights = X_s$weights, exactDOF=T); summary(mod_fe)
      grp_fe <- mod_fe$fe %>% sapply(function(x){length(levels(x))})%>%.[order(names(.))]%>%t()%>%data.table::as.data.table()%>%data.table::setnames(paste0("N_",names(.)))
      betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
      other_betas <- summary(mod_fe)["coefficients"][[1]] %>% .[!grepl("NKVD",row.names(.)),]
      row.names(other_betas) <- row.names(other_betas)%>%gsub("^\\(|\\bscale\\(|\\blog\\(|\\)| \\+ 1","",.)
      other_betas <- lapply(1:nrow(other_betas),function(i0){
          out <- data.table::data.table(b=other_betas[i0,1],l=other_betas[i0,1]-other_betas[i0,2]*1.96,u=other_betas[i0,1]+other_betas[i0,2]*1.96)%>%data.table::setnames(paste0(names(.),"_",row.names(other_betas)[i0]))
      })%>% dplyr::bind_cols()
      m6 <- data.table(outcome_name=yvarz[y0],model_name="FE pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68,aic=stats::AIC(mod_fe),bic=stats::BIC(mod_fe)) %>% dplyr::bind_cols(other_betas,grp_fe)%>%dplyr::select(names(.)[1:7],names(other_betas),dplyr::everything()) %>% .[,rho_hat := get(paste0("b_N_LOO",l0,"_OO"))/(b+get(paste0("b_N_LOO",l0,"_OO"))) ] %>% .[,beta_hat := b] %>% .[,psi_hat:=get(paste0("b_N_LOO",l0,"_OO"))] %>% .[,.SD,.SDcols=c("outcome_name","model_name","beta_hat","psi_hat","rho_hat")]
      m6
    },mc.cores=7)%>%dplyr::bind_rows()
    mermods_boot
  }) %>% dplyr::bind_rows() 

  # Extract estimates
  mod_mean <- mermods[grepl("FE pop",model_name),.(b,get(paste0("b_N_LOO",l0,"_OO"))),by=c("outcome_name","model_name")]%>%data.table::copy()%>%data.table::setnames(c("b","V2"),c("beta_mean","psi_mean"))%>%.[,rho_mean:=psi_mean/(beta_mean+psi_mean)]
  boot_mean <- boot_out %>% .[,lapply(.SD,median),by=c("outcome_name","model_name"),.SDcols=c("beta_hat","psi_hat","rho_hat")] %>% data.table::setnames(c("beta_hat","psi_hat","rho_hat"),c("beta_mean","psi_mean","rho_mean"))
  boot_sd <- boot_out %>% .[,lapply(.SD,sd),by=c("outcome_name","model_name"),.SDcols=c("beta_hat","psi_hat","rho_hat")] %>% data.table::setnames(c("beta_hat","psi_hat","rho_hat"),c("beta_sd","psi_sd","rho_sd"))
  mod_l <- mermods[grepl("FE pop",model_name),.(l,get(paste0("l_N_LOO",l0,"_OO"))),by=c("outcome_name","model_name")]%>%data.table::copy()%>%data.table::setnames(c("l","V2"),c("beta_l","psi_l"))
  boot_l <- boot_out %>% .[,lapply(.SD,quantile,probs=.025),by=c("outcome_name","model_name"),.SDcols=c("beta_hat","psi_hat","rho_hat")] %>% data.table::setnames(c("beta_hat","psi_hat","rho_hat"),c("beta_l","psi_l","rho_l"))
  mod_u <- mermods[grepl("FE pop",model_name),.(u,get(paste0("u_N_LOO",l0,"_OO"))),by=c("outcome_name","model_name")]%>%data.table::copy()%>%data.table::setnames(c("u","V2"),c("beta_u","psi_u"))
  boot_u <- boot_out %>% .[,lapply(.SD,quantile,probs=.975),,by=c("outcome_name","model_name"),.SDcols=c("beta_hat","psi_hat","rho_hat")] %>% data.table::setnames(c("beta_hat","psi_hat","rho_hat"),c("beta_u","psi_u","rho_u"))
  boot_est <- merge(mod_mean,boot_sd,by=c("outcome_name","model_name")) %>% merge(boot_l,by=c("outcome_name","model_name")) %>% merge(boot_u,by=c("outcome_name","model_name")) %>% .[,outcome_name:=factor(outcome_name,levels=yvarz)]%>%.[order(outcome_name)] %>% .[,c("beta_l","psi_l"):=outcome_name %>% match(mod_l[,outcome_name]) %>% mod_l[.,.(beta_l,psi_l)]] %>% .[,c("beta_u","psi_u"):=outcome_name %>% match(mod_u[,outcome_name]) %>% mod_u[.,.(beta_u,psi_u)]] %>% .[,rho_mean := outcome_name %>% match(boot_mean[,outcome_name]) %>% boot_mean[.,rho_mean]]

  # Labels and captions
  labz <- data.table(rn=c("beta","psi","rho","b","ci","reml","icc_DIV_ARMY","icc_bxid","icc_TID","icc_Residual","hausman_p","aic","bic","N_DIV_ARMY","N_bxid","N_TID","n"),rn_lab=c("Direct effect","Reduced form peer effect","Endogenous effect","Estimate","95% CI","REML","ICC (unit)","ICC (battle)","ICC (month)","ICC (residual)","Hausman p","AIC","BIC","Unit FE","Battle FE","Month FE","N"))
  xlabz <- data.table::data.table(rn=c("beta","psi","rho"),rn_lab=c("Direct effect","Reduced form peer effect","Endogenous effect"))
  capz_re <- paste0("\\textbf{Coefficient Estimates for Peer Effects Model (random effects)",ifelse(l0%in%"A"," (within-army peer effects)}. ",ifelse(l0%in%"F"," (within-front peer effects)}. "," (RKKA-wide peer effects)}. ")),"Bootstrapped 95\\% confidence intervals in parentheses. Observations weighted by number of discharge records per unit-month. Null hypothesis for Hausman test: random effects model is consistent."); lbz_fe <- "tab:re_peer"%>%paste0(l0)
  capz_fe <- paste0("\\textbf{Coefficient Estimates for Peer Effects Model",ifelse(l0%in%"A"," (within-army peer effects)}. ",ifelse(l0%in%"F"," (within-front peer effects)}. "," (RKKA-wide peer effects)}. ")),"Linear fixed effect model estimates. Bootstrapped 95\\% confidence intervals in parentheses. Observations weighted by number of discharge records per unit-month. Null hypothesis for Hausman test: random effects model is consistent."); lbz_fe <- "tab:fe_peer"%>%paste0(l0)

  # Fixed effects
  fnz <- paste0("tabA3_",4+which(looz==l0),".tex")
  boot_tab <- boot_est %>% .[grep("FE pop",model_name),.(outcome_name,SUNGEO::smart_round(beta_mean,1),paste0("(",SUNGEO::smart_round(beta_l,1),",",SUNGEO::smart_round(beta_u,1),")"),SUNGEO::smart_round(psi_mean,1),paste0("(",SUNGEO::smart_round(psi_l,1),",",SUNGEO::smart_round(psi_u,1),")"),SUNGEO::smart_round(rho_mean,1),paste0("(",SUNGEO::smart_round(rho_l,1),",",SUNGEO::smart_round(rho_u,1),")"))] %>% data.table::setnames(c("V2","V3","V4","V5","V6","V7"),c("beta","beta_ci","psi","psi_ci","rho","rho_ci")) %>% dplyr::bind_cols(mermods %>% .[grep("FE pop",model_name),.(hausman_p,aic,N_DIV_ARMY,N_bxid,N_TID,n)])
  regtab <- boot_tab %>% .[,hausman_p := hausman_p%>%(function(x){ifelse(x<0.001,"<0.001",SUNGEO::smart_round(x))})] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]  %>% .[,.(outcome_name,beta,beta_ci,psi,psi_ci,rho,rho_ci,hausman_p,aic,N_DIV_ARMY,N_bxid,N_TID,n)] %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz_short[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,] %>% .[,outcome_name := outcome_name %>% match(labz[,rn]) %>% labz[.,rn_lab]]
  regtab %>% xtable::xtable(caption=capz_fe,label=lbz_fe,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,6),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=tempfile(),size="footnotesize",caption.placement="top") %>% gsub("\\begin{tabular}","\\makebox[\\textwidth]{\\begin{tabular}",., fixed = TRUE) %>% gsub("\\end{tabular}","\\end{tabular}}",., fixed = TRUE) %>% cat(sep = "\n", file=paste0("results/",fnz))
}



###########################
# Make Figure A3.6
###########################

rm(list=ls())
source("code/functions.R")

# Load data
X <- readRDS("data/data_divisionlevel.RDS")

# Preprocess officers and enlisted
X_O <- preprocez(X,ranks="O") %>% .[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]
X_A <- preprocez(X,ranks="ALL") %>% .[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]

# Models
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + (1|DIV_ARMY)+(1|TID)+(1|bxid))
model_fe <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | DIV_ARMY + TID + bxid )

#

# Run models
X_s <- X_O %>% data.table::copy()
mermods_O <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe) %>% .[,sample:="Officers"]
X_s <- X_A %>% data.table::copy()
mermods_A <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe) %>% .[,sample:="All ranks"]
mermods <- dplyr::bind_rows(mermods_O,mermods_A)

# CI plot (simplified)
remods_ <- mermods %>% .[grepl("FE pop",model_name),] %>% .[,outcome_name := factor(outcome_name, levels = rev(yvarz))]  %>% dplyr::mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% setDT() %>% .[,outcome_label := outcome_name %>% match(yvarz) %>% ylabz[.] %>% factor(., levels = rev(ylabz))] %>% .[,sample:=factor(sample)]
w_ <- 3.25*1.618; h_ <- 2.9
cbPalette <- c("grey60","black")
png("results/figA3_6.png", width = w_, height = h_, units = "in", res=300) 
ggplot2::ggplot(remods_,  ggplot2::aes(colour=sample)) +
    ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    ggplot2::geom_pointrange(ggplot2::aes(x = outcome_label, y = b, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), shape = 16, fatten=1.618, fill = "WHITE")  + 
    ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618,legend.position = c(0.8, 0.2), legend.direction = "horizontal", legend.title = ggplot2::element_blank(), axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),title=ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("") + ggplot2::ylab("") + 
    ggplot2::guides(col = ggplot2::guide_legend(reverse=TRUE,ncol = 1, byrow = FALSE,override.aes = list(linetype = rep("blank", 2), shape = rep(16, 2)))) + ggplot2::scale_colour_manual(values=cbPalette)
dev.off()



###########################
# Tables A3.7, A3.8
###########################

rm(list=ls())
source("code/functions.R")

# Load data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Add time covariates
X_ <- X_ %>% .[,FIRST_MONTH := 1*(YRMO%in%min(YRMO)),by=UNIT_UA] %>% .[,DEPLOY_LENGTH := as.integer(factor(YRMO,sort(unique(YRMO))))-as.integer(factor(min(YRMO),sort(unique(YRMO)))),by=UNIT_UA]

# Subset divisions
X_s <- X_ %>% .[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]

# Models (with first month dummy)
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + FIRST_MONTH + (1|DIV_ARMY)+(1|TID)+(1|bxid))
model_fe <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + FIRST_MONTH | DIV_ARMY + TID + bxid )
mermods_a <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe)
mermods_a[hausman_p<.05&grepl("FE pop",model_name),]
mermods_a[hausman_p>=.05&grepl("RE",model_name),]

# Models (with deployment duraion spline)
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + splines::bs(DEPLOY_LENGTH) + (1|DIV_ARMY)+(1|TID)+(1|bxid))
model_fe <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) + splines::bs(DEPLOY_LENGTH) | DIV_ARMY + TID + bxid )
mermods_b <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe)
mermods_b[hausman_p<.05&grepl("FE pop",model_name),]
mermods_b[hausman_p>=.05&grepl("RE",model_name),]

# Table labels and captions
labz <- data.table(rn=c("b","ci","reml","icc_DIV_ARMY","icc_bxid","icc_TID","icc_Residual","hausman_p","aic","bic","N_DIV_ARMY","N_bxid","N_TID","n"),rn_lab=c("Estimate","95% CI","REML","ICC (unit)","ICC (battle)","ICC (month)","ICC (residual)","Hausman p","AIC","BIC","Unit FE","Battle FE","Month FE","N"))
xlabz <- data.table::data.table(rn=c("FIRST_MONTH","DEPLOY_LENGTH"),rn_lab=c("First Month","Dep.Duration"))
capz_fe_a <- "\\textbf{Coefficient Estimates with ``First Month'' Dummies}. Estimates for other covariates not shown. Observations weighted by number of discharge records per unit-month. Null hypothesis for Hausman test: random effects model is consistent."; lbz_fe_a <- "tab:fe_tdum"
capz_fe_b <- "\\textbf{Coefficient Estimates with ``Deployment Duration'' Cubic Spline}. Estimates for other covariates not shown. Observations weighted by discharge records per unit-month. Null hypothesis for Hausman test: random effects model is consistent."; lbz_fe_b <- "tab:fe_tspl"

# Make Table A3.7
dirr <- "results/"
fnz_a <- "tabA3_7.tex"
regtab <- mermods_a %>% .[grepl("FE pop",model_name),] %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")] %>% .[,hausman_p := hausman_p%>%(function(x){ifelse(x<0.001,"<0.001",smart_round(x))})] %>% .[,c("b","N_DIV_ARMY","N_bxid","N_TID","n") := lapply(.SD,SUNGEO::smart_round,rnd=3),.SDcols=c("b","N_DIV_ARMY","N_bxid","N_TID","n")] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]  %>% .[,.(outcome_name,b,ci,hausman_p,aic,N_DIV_ARMY,N_bxid,N_TID,n)] %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz_short[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,] %>% .[,outcome_name := outcome_name %>% match(labz[,rn]) %>% labz[.,rn_lab]]
regtab_x <- lapply(xlabz[,which(rn=="FIRST_MONTH")],function(i0){
  xrow <- mermods_a %>% .[grepl("FE pop",model_name),] %>% .[,.SD,.SDcols=paste0(c("b_","l_","u_"),xlabz[i0,rn]),by=outcome_name] %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")] %>% .[,ci := paste0("(",get(paste0("l_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1),",",get(paste0("u_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1),")")] %>% data.table::setnames("ci",paste0("ci_",xlabz[i0,rn]))%>%.[,eval(paste0("b_",xlabz[i0,rn])):=get(paste0("b_",xlabz[i0,rn]))%>%SUNGEO::smart_round(1)]%>%dplyr::select(paste0(c("b_","ci_"),xlabz[i0,rn]))
  xrow
})%>%dplyr::bind_cols()%>%.[,.SD,.SDcols=!grepl("Intercept",names(.))]%>%t() %>% as.data.table(keep.rownames=T)%>%data.table::setnames(names(regtab))%>%.[,outcome_name := outcome_name %>% match(xlabz[,paste0("b_",rn)]) %>% xlabz[.,rn_lab]] %>% .[is.na(outcome_name),outcome_name := ""]
regtab_out_a <- list(regtab[1:2],regtab_x,regtab[3:.N])%>%data.table::rbindlist() %>% xtable::xtable(caption=capz_fe_a,label=lbz_fe_a,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,2,2+nrow(regtab_x)),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0(dirr,fnz_a),size="footnotesize",caption.placement="top")

# Make Table A3.8
fnz_b <- "tabA3_8.tex"
regtab <- mermods_b %>% .[grepl("FE pop",model_name),] %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")] %>% .[,hausman_p := hausman_p%>%(function(x){ifelse(x<0.001,"<0.001",SUNGEO::smart_round(x))})] %>% .[,c("b","N_DIV_ARMY","N_bxid","N_TID","n") := lapply(.SD,SUNGEO::smart_round,rnd=3),.SDcols=c("b","N_DIV_ARMY","N_bxid","N_TID","n")] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]  %>% .[,.(outcome_name,b,ci,hausman_p,aic,N_DIV_ARMY,N_bxid,N_TID,n)] %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz_short[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,] %>% .[,outcome_name := outcome_name %>% match(labz[,rn]) %>% labz[.,rn_lab]]
regtab_x <- {
  mermods_b %>% .[grepl("FE pop",model_name),] %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")] %>% .[,check := "\\checkmark"]%>%dplyr::select("check") %>% data.table::setnames("check","Duration Spline")
  }%>%.[,.SD,.SDcols=!grepl("Intercept",names(.))]%>%t() %>% as.data.table(keep.rownames=T)%>%data.table::setnames(names(regtab))
regtab_out_b <- list(regtab[1:2],regtab_x,regtab[3:.N])%>%data.table::rbindlist() %>% xtable::xtable(caption=capz_fe_b,label=lbz_fe_b,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,2,2+nrow(regtab_x)),sanitize.text.function=function(x){identity(gsub("95%","95\\%",x,fixed=TRUE))},sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0(dirr,fnz_b),size="footnotesize",caption.placement="top")




##############################################################################
##############################################################################
##############################################################################
## Evidence from Matched Soviet Rifle Divisions
##
## Table 1: Paired Comparison: Battle of Leningrad (9 July–26 Oct.1941)
## Table A4.9: Covariate balance statistics, pre- and post-matching
## Figure A4.7: Impact of NKVD Presence on Battlefield Outcomes, Matched Sample.
## Table A4.10: Unit Pairs with Largest Disparities in NKVD Presence (Defensive Battles).
## Table A4.11: Unit Pairs with Largest Disparities in NKVD Presence (Offensive Battles).
##############################################################################
##############################################################################
##############################################################################

print("Running: code chunk 2")
# Clear workspace, source functions
rm(list=ls())
source("code/functions.R")

##################################
# Load and prepare data
##################################

# Load and preprocess data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% .[NKVD_ALL>0,] %>% preprocez(.)

# Add numeric IDs
X_[,UNITID_NUM := UNIT_UA %>% as.factor() %>% as.numeric()]
X_[,FRONT_NUM := FRONT %>% as.factor() %>% as.numeric()]
X_[,ARMY_NUM := ARMY %>% as.factor() %>% as.numeric()]
X_[,BX_NUM := bxid %>% as.factor() %>% as.numeric()]
X_[,YRMO_NUM := as.Date(paste0(as.character(YRMO), '01'), format='%Y%m%d') %>% as.factor() %>% as.integer()]

# Add binary Treatment
X_[,NKVD_OO_b := 1*(NKVD_OO > mean(NKVD_OO))]
X_[,lNKVD_OO_b := 1*(log(NKVD_OO+1) > mean(log(NKVD_OO+1)))]
tvarz <- c("lNKVD_OO_b","NKVD_OO_b")

# New variables
X_[,TYPE_NUM := as.factor(UNIT_TYPE) %>% as.numeric()]

# Quantiles
X_[,N_QT := cut(N_SOLDIERS, quantile(N_SOLDIERS, probs=0:4/4), include.lowest=TRUE, labels=FALSE)] 
X_[,REP_QT := cut(M_10km_ADM3, quantile(M_10km_ADM3, probs=0:4/4,na.rm=TRUE), include.lowest=TRUE, labels=FALSE)] 

# # Specify variables to match on
xvarz_ <- c("russian_jw","soldiers_dist","AGE_1941","POP_DENSITY","N_SOLDIERS","N_QT","M_10km_ADM3","REP_QT")
xvarz <- c("REP_QT")


##################################
# Match
##################################

# Match
matchvar <- c("ARMY_NUM","TYPE_NUM","TID","BX_NUM","N_QT","REP_QT")
exactz <- !grepl(yvarz[1],matchvar)
YTX <- X_[,.SD,.SDcols=c("UNIT_UA","YRMO","bat_name","FRONT",tvarz,matchvar,yvarz)] %>% na.omit()
suppressWarnings({
  m.pairs <- Matching::Match(Tr = YTX[,get(tvarz[1])],X = YTX[,.SD,.SDcols=matchvar],exact=exactz,version="fast",ties=F, replace=F)
})
summary(m.pairs)

# Extract ids
id_matched <- data.table(id_t=YTX[m.pairs$index.treated,UNIT_UA],id_c=YTX[m.pairs$index.control,UNIT_UA],yrmo=YTX[m.pairs$index.treated,YRMO],front=YTX[m.pairs$index.treated,FRONT],battle=YTX[m.pairs$index.treated,bat_name],bxid=YTX[m.pairs$index.treated,BX_NUM]) %>% .[,n_t:=.N, by=id_t] %>% .[,n_c:=.N, by=id_c]

##################################
# Covariate balance
##################################

# Balance on pre-T covariates
balvarz <- c("ARMY_NUM","TYPE_NUM","TID","BX_NUM","N_SOLDIERS","russian_jw","AGE_1941","soldiers_dist","POP_DENSITY","CROPLAND_WITHIN_5km","URBAN_PCT","M_10km_ADM3")
ballabz <- c("Army ID","Unit type","Month","Battle ID","Number of Casualties","Proportion Russian","Soldiers' Age","Geographic Diversity","Population Density","Cropland","Urbanization","Pre-War Repression Exposure")
YTX_ <- merge(YTX,X_[,.SD,.SDcols=c("UNIT_UA","YRMO",balvarz[!balvarz%in%names(YTX)])],by=c("UNIT_UA","YRMO"),all.x=T,all.y=F)
for(j in balvarz){data.table::set(YTX_,which(is.na(YTX_[[j]])),j,-99)}
rnd <- 3
set.seed(123); m.bal <- Matching::MatchBalance(as.formula(paste0("NKVD_OO_b ~ ",paste0(balvarz,collapse="+"))), data=YTX_, match.out=m.pairs, nboots=1000)

# Balance table
match_bal <- lapply(1:(length(balvarz)),function(i0){#print(i0)
  data.table::data.table(
    X=c(ballabz[i0],""),
    match=c("pre-matching","post-matching"),
    mean_T=c(m.bal$BeforeMatching[i0][[1]]$mean.Tr %>% SUNGEO::smart_round(rnd),
      m.bal$AfterMatching[i0][[1]]$mean.Tr %>% SUNGEO::smart_round(rnd)),
    mean_C=c(m.bal$BeforeMatching[i0][[1]]$mean.Co %>% SUNGEO::smart_round(rnd),
      m.bal$AfterMatching[i0][[1]]$mean.Co %>% SUNGEO::smart_round(rnd)),
    std_diff=c({(m.bal$BeforeMatching[i0][[1]]$mean.Tr-m.bal$BeforeMatching[i0][[1]]$mean.Co)/sqrt(m.bal$BeforeMatching[i0][[1]]$var.Tr)} %>% SUNGEO::smart_round(rnd),
      {(m.bal$AfterMatching[i0][[1]]$mean.Tr-m.bal$AfterMatching[i0][[1]]$mean.Co)/sqrt(m.bal$AfterMatching[i0][[1]]$var.Tr)} %>% SUNGEO::smart_round(rnd)),
    ks=c(m.bal$BeforeMatching[i0][[1]]$ks$ks$statistic %>% round(rnd) %>% paste0(.,{m.bal$BeforeMatching[i0][[1]]$ks$ks$p.value %>% findInterval(.,c(0,.001,.01,.05,.1,1)) %>% c("**","**","*","","","")[.]}),
      m.bal$AfterMatching[i0][[1]]$ks$ks$statistic %>% round(rnd) %>% paste0(.,
      {m.bal$AfterMatching[i0][[1]]$ks$ks$p.value %>% findInterval(.,c(0,.001,.01,.05,.1,1)) %>% c("**","**","*","","","")[.]}))
  )
}) %>% dplyr::bind_rows() %>% data.table::setnames(c("Covariate","Status","Mean T","Mean C","Std. Diff.","KS Statistic"))
match_bal

# Make Table A4.9
xmat <- xtable::xtable(match_bal,caption = "{\\textbf{Covariate Balance Statistics, Pre- and Post-Matching}.}",label="tab:match_bal",align=paste0("lp{2.5cm}p{3.5cm}p{1.75cm}p{1.75cm}p{1.75cm}p{1.75cm}")) %>% data.table::setnames(paste0("\\multicolumn{1}{c}{",names(.),"}")) 
for(r0 in c(1:nrow(xmat))){xmat[r0,] <- paste0("\\multicolumn{1}{c}{",xmat[r0,],"}")}
xmat <- xmat %>%  print(.,hline.after=NULL,add.to.row=list(pos=list(-1,0, nrow(match_bal)),command=c("\\toprule ","\\midrule ", "\\bottomrule ")),sanitize.colnames.function = identity,sanitize.text.function=identity,caption.placement="top",size="\\small",include.rownames=FALSE)
note <- paste("Standardized difference (Std. Diff.) is $\\frac{\\mbox{mean}(T)-\\mbox{mean}(C)}{\\mbox{sd}(T)}$. Significance levels (two-tailed): $^{*} p < 0.05$; $^{**} p < 0.01$.\n}", sep = "")
cat(gsub("\\end{tabular}\n", paste("\\end{tabular}\n\\caption*{{\\scriptsize ", note, "}\n", sep = ""), xmat, fixed = TRUE), sep = "\n", file =  "results/tabA4_9.tex")


##################################
# Case selection
##################################

# Extract matched pairs
x_matched <- id_matched %>% merge(X_[,.SD,.SDcols=c("UNIT_UA","YRMO","NKVD_ALL","NKVD_OO",yvarz,tvarz,xvarz_)] %>% data.table::setnames(c(yvarz,"NKVD_ALL","NKVD_OO",tvarz,xvarz_),paste0(c(yvarz,"NKVD_ALL","NKVD_OO",tvarz,xvarz_),"_t")),by.x=c("id_t","yrmo"),by.y=c("UNIT_UA","YRMO"),all.x=T,all.y=F) %>% merge(X_[,.SD,.SDcols=c("UNIT_UA","YRMO","NKVD_ALL","NKVD_OO",yvarz,tvarz,xvarz_)] %>% data.table::setnames(c(yvarz,"NKVD_ALL","NKVD_OO",tvarz,xvarz_),paste0(c(yvarz,"NKVD_ALL","NKVD_OO",tvarz,xvarz_),"_c")),by.x=c("id_c","yrmo"),by.y=c("UNIT_UA","YRMO"),all.x=T,all.y=F)

# Offensive & defensive battles
x_matched[,offense := 1*(grepl("наступ|Разгром|Ликвидация|Преслед",battle,ignore.case=T))]
x_matched[,defense := 1*(grepl("оборон|сандомир|Курск",battle,ignore.case=T)&!grepl("Сандомирско-Силезская наступательная операция",battle,ignore.case=T))]

# Sort by difference in NKVD presence
x_matched[,nkvd_diff := NKVD_OO_t-NKVD_OO_c]

# Top 10 cases
x_def_10 <- x_matched %>% .[defense==1] %>% .[order(nkvd_diff)%>%rev()%>%.[1:10],.(id_t,id_c,yrmo,battle,bxid,nkvd_diff,NKVD_OO_t,NKVD_OO_c)]
x_off_10 <- x_matched %>% .[offense==1] %>% .[order(nkvd_diff)%>%rev()%>%.[1:10],.(id_t,id_c,yrmo,battle,bxid,nkvd_diff,NKVD_OO_t,NKVD_OO_c)]


# Make Tables A4.10, A4.11
btrans <- function(x){x %>% gsub(" уд а$"," SA",.) %>% gsub(" а$"," A",.) %>% gsub(" сд "," RD ",.) %>% gsub(" тбр "," TB ",.) %>% gsub(" ибр "," EB ",.) %>% gsub(" сбр "," RB ",.) %>% gsub(" ск "," RC ",.) %>% gsub(" тк "," TC ",.) %>% gsub(" кк "," CC ",.) %>% gsub(" пабр "," MGAB ",.) %>% gsub(" минбр "," MORB ",.)  %>% gsub(" гв "," Guards ",.)}
batlabs <- data.table::data.table(bat_name=c("2-й удар. Разгром на правобережной Украине","6-й удар. Разгром немцев в Западной Украине","Контрнаступление советских войск под Москвой","Наступательные и оборонительные бои на Любаньском направлении.","Синявская наступательная операция.","Чернигов-Припятьская наступательная операция.","Битва под Сталинградом. Оборона советских войск.","Курская битва.","Оборона на западных и юго-западных подступах к Ленинграду.","Оборона перевалов центральной части главного Кавказского хребта 46-й Армией","Оборонительное сражение под Москвой.","Смоленское оборонительное сражение.", "5-й удар. Разгром немцев в Белоруссии","8-й удар. Разгром немцев в Прибалтике","Наступательная операция на харьковском направлении и контрудар немцев.","Берлинская наступательная операция.","Краснодарско-Новороссийская наступательная операция.","Преследование Невельской группировки немцев","Красносельско-Ропшинская наступательная операция и развитие ее на Псков."),bat_lab=c("Right-Bank Ukraine Offensive","Western Ukrainian Offensive","Moscow Counteroffensive","Battle of Lyuban'","Sinyavino Offensive","Chernigov-Pripyat' Offensive","Defensive Operations Near Stalingrad","Battle of Kursk","Defensive Operations South/Southwest of Leningrad","Defense of Central Passes of Main Caucasus Ridge","Defensive Operations South/Southwest of Leningrad","Smolensk Defensive Battle", "Belorussian Offensive","Baltic Offensive","Kharkov Offensive","Berlin Offensive","Krasnodar-Novorossiysk Offensive","Novosokolniki Offensive","Krasnoye Selo - Ropsha offensive"))

# Defensive
defbl <- data.table::data.table(bat_name=x_def_10[,battle %>% unique() %>% sort()],label=x_def_10[,battle %>% unique() %>% sort()]%>%match(batlabs[,bat_name])%>%batlabs[.,bat_lab])
x_def_10_ <- x_def_10[order(nkvd_diff) %>% rev(),.(battle,id_t %>% btrans(),id_c %>% btrans(),ceiling(NKVD_OO_t) %>% as.integer(),ceiling(NKVD_OO_c) %>% as.integer())] %>% data.table::setnames(c("battle","Unit (T)","Unit (C)","NKVD (T)","NKVD (C)")) %>% .[,Battle := battle %>% match(defbl[,bat_name]) %>% defbl[.,label]] %>% dplyr::select(Battle,dplyr::everything()) %>% .[,battle := NULL]
capz <- "\\textbf{Unit Pairs with Largest Disparities in NKVD Presence} (Defensive Battles)."; lbz <- "tab:match2_def"
fnz <- "tabA4_10.tex"
x_def_10_ %>% xtable::xtable(caption=capz,label=lbz,align=paste0("rrrrrr")) %>% print(include.rownames=TRUE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(-1,0,10),file=tempfile(),size="footnotesize",caption.placement="top") %>% gsub("\\begin{tabular}","\\makebox[\\textwidth]{\\begin{tabular}",., fixed = TRUE) %>% gsub("\\end{tabular}","\\end{tabular}}",., fixed = TRUE) %>% cat(sep = "\n", file=paste0("results/",fnz))

# Offensive
offbl <- data.table::data.table(bat_name=x_off_10[,battle %>% unique() %>% sort()],label=x_off_10[,battle %>% unique() %>% sort()]%>%match(batlabs[,bat_name])%>%batlabs[.,bat_lab])
x_off_10_ <- x_off_10[order(nkvd_diff) %>% rev(),.(battle,id_t %>% btrans(),id_c %>% btrans(),ceiling(NKVD_OO_t),ceiling(NKVD_OO_c))] %>% data.table::setnames(c("battle","Unit (T)","Unit (C)","NKVD (T)","NKVD (C)")) %>% .[,Battle := battle %>% match(offbl[,bat_name]) %>% offbl[.,label]] %>% dplyr::select(Battle,dplyr::everything()) %>% .[,battle := NULL]
capz <- "\\textbf{Unit Pairs with Largest Disparities in NKVD Presence} (Offensive Battles)."; lbz <- "tab:match2_off"
fnz <- "tabA4_11.tex"
x_off_10_ %>% xtable::xtable(caption=capz,label=lbz,align=paste0("rrrrrr")) %>% print(include.rownames=TRUE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(-1,0,10),file=tempfile(),size="footnotesize",caption.placement="top") %>% gsub("\\begin{tabular}","\\makebox[\\textwidth]{\\begin{tabular}",., fixed = TRUE) %>% gsub("\\end{tabular}","\\end{tabular}}",., fixed = TRUE) %>% gsub("\\.00\\b","",.) %>% cat(sep = "\n", file=paste0("results/",fnz))

# Make Table 1
x_cases <- x_matched %>% .[yrmo==194110&grepl("^90|^168 сд",id_t)&grepl("^90|^168 сд",id_c)] %>% (function(x){merge(x %>% .[,.SD,.SDcols=grep("_t$|^yrmo$",names(.),value=TRUE)] %>% data.table::transpose(keep.names="Variable")%>% .[grep("\\.\\d{1-}",V1),V1:=V1%>%as.numeric()%>%SUNGEO::smart_round(2)]%>%data.table::setnames("V1","Treated")%>%.[,Variable:=Variable%>%gsub("_t$","",.)],x %>% .[,.SD,.SDcols=grep("_c$|^yrmo$",names(.),value=TRUE)] %>% data.table::transpose(keep.names="Variable")%>% .[grep("\\.\\d{1-}",V1),V1:=V1%>%as.numeric()%>%SUNGEO::smart_round(2)]%>%data.table::setnames("V1","Control")%>%.[,Variable:=Variable%>%gsub("_c$","",.)],by="Variable")}) %>% .[,Variable:=factor(Variable,levels=c("id","N_SOLDIERS","NKVD_OO","KIA_100","WIA_100","MIA_100","POW_100","DES_100","PUN_100","WMED_100"))] %>% .[!is.na(Variable)] %>% .[order(Variable)] %>% .[grep("_OO$|_100$",Variable),Difference:=(as.numeric(Treated)-as.numeric(Control))%>%SUNGEO::smart_round(2)] %>% .[grep("_100$",Variable),c("Treated","Control","Difference"):=lapply(.SD,function(x){paste0(x,"%")}),.SDcols=c("Treated","Control","Difference")]
labmat <- data.table::data.table(varz=c("NKVD_OO","KIA_100","MIA_100","POW_100","PUN_100","WMED_100"),labz=c("NKVD OO/SMERSH","KIA","MIA","POW","Punish","Medals for Valor"))
manual_insert <- {"& & & \\\\
  \\underline{Exact Matching} & & & \\\\ 
  Front & Leningrad & Leningrad  & \\\\ 
  Army & 55th & 55th & \\\\ 
  Unit Type & Rifle Division & Rifle Division & \\\\ 
  & & & \\\\  
  \\underline{Additional Unit Traits} & & & \\\\ 
  Formation Date & 1939 & 1936  & \\\\ 
  Formation & Second & Second & \\\\ 
  Soldiers (\\emph{Approx.}) & 10,000--13,654  & 10,000--10,258  & 0--3,396 \\\\ 
  Artillery/Howitzers & 38 & 42 & 4 \\\\ 
  Anti-Aircraft Guns & 8 & 4 & 4 \\\\ 
  Anti-Tank Guns & 54 & 48 & 6 \\\\ 
  Vehicles & 771 & 690 & 81  \\\\ 
  Initial Front (Linear km) & 60--65 & 50--52  &  10--13  \\\\ 
  Force to Space Ratio (Linear km) &  167--210 &   198--200 & 21--10 \\\\ 
  Force to Force Ratio (USR:GER) & 1:2.5--1:3 & 1:2.5--1.3 & 0    \\\\ 
  Soldiers Per Vehicle & 13--18 & 14--15 & 1--3 \\\\ 
  Support \\% & 37\\% & 31\\% & 6\\% \\\\  
  & & & \\\\ 
  \\underline{Battlefield Performance} & & & \\\\
  "}
note <- paste0("\\multicolumn{4}{p{350pt}}{\\footnotesize NOTE: Battlefield performance indicators are derived from October 1941 declassified personnel records for the 168th (N=",x_cases[Variable=="N_SOLDIERS",Treated],") and 90th (N=",x_cases[Variable=="N_SOLDIERS",Control],") Rifle Divisions. Estimates of divisional strength are drawn from official tables of organization and measured on the eve of the Battle of Leningrad (see \\citealt[526,548]{askey_16}).} ")
xmat <- x_cases %>% data.table::copy() %>% .[Variable%in%labmat[,varz]] %>% .[,Variable:=Variable%>%match(labmat[,varz])%>%labmat[.,labz]] %>% data.table::setnames(c(" ","\\emph{168th RD}","\\emph{90th RD}","Difference"))
xmat %>% xtable::xtable(caption="\\textbf{Paired Comparison: Battle of Leningrad} (9 July--26 Oct.1941)",label="tab:newpaired")%>%xtable::print.xtable(caption.placement="top",table.placement="H",include.rownames=FALSE,add.to.row=list(pos=list(1,5,6),command=c(manual_insert,"Div. Commanders KIA & 0 & 3 & -3  \\\\ \n  ","& & & \\\\ \n")),booktabs=TRUE,sanitize.colnames.function=identity) %>% gsub("\\centering",paste0("\\begin{singlespace} \n \\centering"),.,fixed=TRUE) %>% gsub("\\end{table}",paste0("\\end{singlespace} \n \\end{table}"),.,fixed=TRUE) %>% gsub("\\begin{tabular}",paste0("\\footnotesize\\begin{tabular}"),.,fixed=TRUE) %>% gsub("\\bottomrule\n",paste0("\\bottomrule\n ",note,"\n"),.,fixed=TRUE) %>% gsub("\\midrule\n",paste0("\\midrule\n & & & \\\\ \n"),.,fixed=TRUE) %>% cat(file =  "results/tab1.tex")




##################################
# Make Figure A4.7
##################################

# FE & RE Models
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) +(1|TID)+(1|bxid))
model_fe <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(M_10km_ADM3) | TID + bxid )

# Subset matched
X_m <- X_[c(m.pairs$index.treated,m.pairs$index.control),]

# Loop over DV's
y0 <- 1; yvarz[y0]
mermods <- parallel::mclapply(seq_along(yvarz),function(y0){
  tryCatch({
  # Unweighted
  mod_xc1 <- lme4::lmer(model_re, data=X_m[,c("y","weights"):=.(get(yvarz[y0]),1)],weights=X_m$weights, control = lme4::lmerControl(optimizer ="Nelder_Mead"))
  betas <- summary(mod_xc1)["coefficients"][[1]] %>% .[grep("NKVD_",row.names(.)),]
  iccs <- icc_fun(mod_xc1) %>% .[,.(grp,icc)] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(.[1,] %>% as.character() %>% paste0("icc_",.)) %>% .[-1]
  m1 <- data.table::data.table(outcome_name=yvarz[y0],model_name="RE unweighted",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, reml = summary(mod_xc1)$AICtab) %>% dplyr::bind_cols(iccs)
  # Log-pop weights
  mod_xc2 <- lme4::lmer(model_re, data=X_m[,c("y","weights"):=.(get(yvarz[y0]),log(N_SOLDIERS+1))],weights=X_m$weights, control = lme4::lmerControl(optimizer ="Nelder_Mead"))
  betas <- summary(mod_xc2)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
  iccs <- icc_fun(mod_xc2) %>% .[,.(grp,icc)] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(.[1,] %>% as.character() %>% paste0("icc_",.)) %>% .[-1]
  m2 <- data.table::data.table(outcome_name=yvarz[y0],model_name="RE log-pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, reml = summary(mod_xc2)$AICtab) %>% dplyr::bind_cols(iccs)
  # Pop weights
  mod_xc3 <- lme4::lmer(model_re, data=X_m[,c("y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)],weights=X_m$weights, control = lme4::lmerControl(optimizer ="Nelder_Mead"))
  betas <- summary(mod_xc3)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
  iccs <- icc_fun(mod_xc3) %>% .[,.(grp,icc)] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(.[1,] %>% as.character() %>% paste0("icc_",.)) %>% .[-1]
  m3 <- data.table::data.table(outcome_name=yvarz[y0],model_name="RE pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, reml = summary(mod_xc3)$AICtab) %>% dplyr::bind_cols(iccs)
  # FE unweighted
  mod_fe1 <- lfe::felm(model_fe, data = X_m[,c("Y","weights"):=.(get(yvarz[y0]),1)], weights = X_m$weights, exactDOF=T); 
  betas <- summary(mod_fe1,robust=TRUE)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
  m4 <- data.table::data.table(outcome_name=yvarz[y0],model_name="FE unweighted",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96)
  # FE log pop
  mod_fe2 <- lfe::felm(model_fe, data = X_m[,c("Y","weights"):=.(get(yvarz[y0]),log(N_SOLDIERS+1))], weights = X_m$weights, exactDOF=T); 
  betas <- summary(mod_fe2,robust=TRUE)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
  m5 <- data.table::data.table(outcome_name=yvarz[y0],model_name="FE log pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96)
  # FE pop
  mod_fe3 <- lfe::felm(model_fe, data = X_m[,c("Y","weights"):=.(get(yvarz[y0]),N_SOLDIERS)], weights = X_m$weights, exactDOF=T); 
  mod_fe3 <- lfe::felm(model_fe, data = X_m, weights = X_m$weights, exactDOF=T); 
  betas <- summary(mod_fe3,robust=TRUE)["coefficients"][[1]] %>% .[grep("NKVD",row.names(.)),]
  m6 <- data.table::data.table(outcome_name=yvarz[y0],model_name="FE pop weights",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96) 

  data.table::rbindlist(list(m1,m2,m3,m4,m5,m6),fill=T)
  },error=function(e){})
},mc.cores=min(5,parallel::detectCores())) %>% dplyr::bind_rows()

# CI plot
remods_ <- mermods %>% .[grepl("FE p",model_name),] %>% .[,outcome_name := factor(outcome_name, levels = rev(yvarz))] %>% mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% setDT() %>% .[,outcome_label := outcome_name %>% match(yvarz) %>% ylabz[.] %>% factor(., levels = rev(ylabz))] 
p <- ggplot2::ggplot(remods_,  ggplot2::aes()) +
    ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    ggplot2::geom_pointrange(aes(x = outcome_label, y = b, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), shape = 16, fatten=1.618, fill = "WHITE")  + 
    ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618, axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),title=ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("") + ggplot2::ylab("") 
w_ <- 3.25*1.618; h_ <- 2.9
png("results/figA4_7.png", width = w_, height = h_, units = "in",res=300) 
p
dev.off()






##############################################################################
##############################################################################
##############################################################################
## Prewar Repression and Officer Purges
##
## Table A5.12: Fixed Effects Estimates, Controlling for Prewar Purges.
## Table A5.13: Random Effects Estimates, Controlling for Prewar Purges.
## Table A5.14: Predictors of NKVD Presence, including purges.
## Figure A5.8: Prewar repression, purges and battlefield losses, sequential-g estimates.
## Figure A5.9: Effect of NKVD presence on losses, by level of prewar repression and purges.
##############################################################################
##############################################################################
##############################################################################


###########################
# Make Tables A5.12, A5.13
###########################

# Purges (new)
rm(list=ls())
source("code/functions.R")

# Load and preprocess data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Subset
X_s <- X_[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]

# Base model (w/ purges as covariate)
model_re <- as.formula(y ~ log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(purged_all_na) + (1|ARMY)+(1|TID)+(1|bxid))
model_fe <- Formula::Formula(y ~  log(NKVD_OO+1) + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) + scale(purged_all_na) | ARMY + TID + bxid )

# Estimate model
mermods <- ensemble_modz(X_s=X_s,yvarz=yvarz,model_re=model_re,model_fe=model_fe)

# Labels and captions
labz <- data.table(rn=c("b","ci","reml","icc_DIV_ARMY","icc_bxid","icc_TID","icc_Residual","hausman_p","aic","bic","N_ARMY","N_bxid","N_TID","n"),rn_lab=c("Estimate","95% CI","REML","ICC (unit)","ICC (battle)","ICC (month)","ICC (residual)","Hausman p","AIC","BIC","Army FE","Battle FE","Month FE","N"))
xlabz <- data.table::data.table(rn=c("Intercept","AGE_1941","soldiers_dist","RUS_100","POP_DENSITY","URBAN_PCT","purged_all_na"),rn_lab=c("Intercept","Avg.Age","Geo.Diversity","Pct.Russian","Pop.Density","Urbanization","PrewarPurges"))
capz_re <- "\\textbf{Random Effects Estimates, Controlling for Prewar Purges}. Observations weighted by number of discharge records per unit-month. ICC:  intraclass correlation coefficient. Null hypothesis for Hausman test: random effects model is consistent."; lbz_re <- "tab:re_purge"
capz_fe <- "\\textbf{Fixed Effects Estimates, Controlling for Prewar Purges}. Observations weighted by number of discharge records per unit-month. Null hypothesis for Hausman test: random effects model is consistent."; lbz_fe <- "tab:fe_purge"

# Make Table A5.12
fnz <- "tabA5_12.tex"
regtab <- mermods %>% .[grepl("FE pop",model_name),] %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")] %>% .[,hausman_p := hausman_p%>%(function(x){ifelse(x<0.001,"<0.001",SUNGEO::smart_round(x,3))})] %>% .[,c("b","N_ARMY","N_bxid","N_TID","n") := lapply(.SD,SUNGEO::smart_round,rnd=3),.SDcols=c("b","N_ARMY","N_bxid","N_TID","n")] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]  %>% .[,.(outcome_name,b,ci,hausman_p,aic,N_ARMY,N_bxid,N_TID,n)] %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz_short[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,] %>% .[,outcome_name := outcome_name %>% match(labz[,rn]) %>% labz[.,rn_lab]]
regtab_out <- regtab %>% xtable::xtable(caption=capz_fe,label=lbz_fe,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,2),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),size="footnotesize",caption.placement="top")

# Make Table A5.13
fnz <- "tabA5_13.tex"
regtab <- mermods %>% .[grepl("RE pop",model_name),]  %>% dplyr::mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% data.table::setDT() %>% .[!outcome_name%in%c("OK_100","DEF_100","TRE_100","GMED_100")] %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")] %>% .[,hausman_p := hausman_p%>%(function(x){ifelse(x<0.001,"<0.001",SUNGEO::smart_round(x,3))})] %>% .[,c("b","icc_ARMY","icc_bxid","icc_TID","icc_Residual","N_ARMY","N_bxid","N_TID","n") := lapply(.SD,SUNGEO::smart_round,3),.SDcols=c("b","icc_ARMY","icc_bxid","icc_TID","icc_Residual","N_ARMY","N_bxid","N_TID","n")] %>% .[,reml := reml %>% SUNGEO::smart_round(1)] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]  %>% .[,.(outcome_name,b,ci,reml,icc_ARMY,icc_bxid,icc_TID,icc_Residual,hausman_p,aic,N_ARMY,N_bxid,N_TID,n)] %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz_short[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,] %>% .[,outcome_name := outcome_name %>% match(labz[,rn]) %>% labz[.,rn_lab]]
regtab_out <- regtab %>% xtable::xtable(caption=capz_re,label=lbz_re,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,2),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),size="footnotesize",caption.placement="top")



###########################
# Make Table A5.14
###########################

# Purges (new)
rm(list=ls())
source("code/functions.R")

# Load and preprocess data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Subset
X_s <- X_[ECH_DIV==1,] %>% .[!duplicated(paste(UNIT_UA,bxid,TID,sep="_")),]

# NKVD Assignment Model
xxvar <- "purged_all"
mod <- lfe::felm(Formula::Formula(as.formula(paste0("log(NKVD_OO+1) ~ TID + UNIT_TYPE + RUS_100 + soldiers_dist + AGE_1941 + URBAN_PCT + POP_DENSITY + log(",xxvar,"+1) | ARMY"))), data=X_s, weights = X_s$N_SOLDIERS); summary(mod)

# Labels
betas <- summary(mod)["coefficients"][[1]]%>%data.table::as.data.table()%>%.[,rn:=row.names(summary(mod)["coefficients"][[1]])]
labz <- data.table::data.table(rn=c("TID","UNIT_TYPEART_AAD","UNIT_TYPEAVIATION","UNIT_TYPEENGINEER","UNIT_TYPEINF_RFL_ABN","RUS_100","soldiers_dist","AGE_1941","URBAN_PCT","POP_DENSITY",paste0("log(",xxvar," + 1)")),rn_lab=c("Month of war","Artillery/AAD","Aviation","Engineer","Rifle","Pct.Russian","Geo.Diversity","Avg.Age","Urbanization","Pop.Density","Pre-War Purges"))
capz_fe <- "\\textbf{Predictors of NKVD Presence}, including purges. Dependent variable is logged number of OO/SMERSH personnel assigned to each division-month. Observations weighted by number of monthly discharge records. Percentage point change calculated as $(e^{\\hat{\\beta}}-1)\\times 100$."; lbz_fe <- "tab:reg_assign_purge"

# Make Table A5.14
fnz <- "tabA5_14.tex"
options(scipen=999)
regtab <- data.table::data.table(Variable=c(as.character(unlist(do.call(rbind, apply(data.frame(betas[,rn]), 1, function(x) {rbind(x, data.frame(""))})))),"AIC","Army FE","N")) %>% .[Variable %in% labz[,rn],Variable := Variable %>% match(labz[,rn]) %>% labz[.,rn_lab]] %>% .[,`Coefficient (95\\% CI)`:=c(as.character(unlist(do.call(rbind, apply(data.frame(betas[,SUNGEO::smart_round(Estimate,2)]), 1, function(x) {rbind(x, data.frame(""))})))),stats::AIC(mod)%>%SUNGEO::smart_round(0),as.integer(mod$fe %>% sapply(function(x){length(levels(x))})),as.integer(mod["N"]))]%>%.[`Coefficient (95\\% CI)`%in%"",`Coefficient (95\\% CI)`:=betas[,paste0("(",SUNGEO::smart_round(Estimate-1.96*`Std. Error`,2),",",SUNGEO::smart_round(Estimate+1.96*`Std. Error`,2),")")]]%>% .[,`Pct.Change`:= c(as.character(unlist(do.call(rbind, apply(data.frame(SUNGEO::smart_round((summary(mod)["coefficients"][[1]][,"Estimate"]%>%exp()-1)*100,1)), 1, function(x) {rbind(x, data.frame(""))})))),"","","")]
regtab_out <- regtab %>% xtable::xtable(caption=capz_fe,label=lbz_fe,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(-1,0,nrow(betas)*2,nrow(betas)*2+3),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),caption.placement="top",sanitize.text.function=identity)



###########################
# Make Figure A5.8
###########################

rm(list=ls())
source("code/functions.R")

# Load data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Loop over prewar variables
xxvarz <- c("M_10km_ADM3","purged_all")
# xxvar <- xxvarz[2]
for(xxvar in xxvarz){

  # New covariates
  X_s <- X_ %>% data.table::copy() %>% .[,high_pwr:=1*(get(xxvar)>median(get(xxvar),na.rm=TRUE))] %>% .[,lNKVD_OO := log(NKVD_OO+1)] %>% .[,IX:=get(xxvar)*NKVD_OO] %>% .[,lIX:=log(get(xxvar)+1)*log(NKVD_OO+1)]

  # Squential G model (demediation function from ABS2016)
  yvarz <- c("KIA_100","WIA_100","MIA_100","POW_100","DES_100","PUN_100","WMED_100")
  xvar_med <- c("lNKVD_OO","lIX")
  xvar_post <- c("AGE_1941","soldiers_dist","RUS_100","URBAN_PCT","POP_DENSITY")
  model_fe_naive <- Formula::Formula(as.formula(paste0("c(KIA_100,WIA_100,MIA_100,POW_100,DES_100,PUN_100,WMED_100) ~ scale(",paste0(xvar_post,collapse=")+scale("),") + log(",xxvar,"+1) | ARMY + TID + bxid ")))
  model_fe_base <- Formula::Formula(as.formula(paste0("c(KIA_100,WIA_100,MIA_100,POW_100,DES_100,PUN_100,WMED_100) ~  log(",xxvar,"+1) | ARMY + TID + bxid")))
  model_fe_full <- Formula::Formula(as.formula(paste0("c(KIA_100,WIA_100,MIA_100,POW_100,DES_100,PUN_100,WMED_100) ~ ",paste0(xvar_med,collapse="+"),"+ scale(",paste0(xvar_post,collapse=")+scale("),") + log(",xxvar,"+1) | ARMY + TID + bxid ")))
  model_fe_seqg <- Formula::Formula(as.formula(paste0("c(KIA_100_dm,WIA_100_dm,MIA_100_dm,POW_100_dm,DES_100_dm,PUN_100_dm,WMED_100_dm) ~  log(",xxvar,"+1) | ARMY + TID + bxid ")))
  mod_naive <- fixest::feols(model_fe_naive,data=X_s,vcov="cluster"); fixest::etable(mod_naive)
  mod_base <- fixest::feols(model_fe_base,data=X_s,vcov="cluster"); fixest::etable(mod_base)
  mod_full <- fixest::feols(model_fe_full,data=X_s,vcov="cluster"); fixest::etable(mod_full)
  beta_post <- coefficients(mod_full) %>% .[,grep(paste0(xvar_post,collapse="|"),names(.))]
  beta_med <- coefficients(mod_full) %>% .[,grep(paste0(xvar_med,collapse="|"),names(.))]
  X_s %>% .[,paste0(yvarz,"_post"):=lapply(seq_along(yvarz),function(y0){apply(.SD*t(beta_med[y0,]),1,sum)}),.SDcols=xvar_med] %>% .[,paste0(yvarz,"_dm"):=lapply(yvarz,function(y0){get(y0)-get(paste0(y0,"_post"))})]
  mod_seqg <- fixest::feols(model_fe_seqg,data=X_s,vcov="cluster"); fixest::etable(mod_seqg)

  # Bootstrap the SEs
  boots <- 1000
  set.seed(1); mod_seqg_boot <- lapply(1:boots,function(b){print(paste0("Bootstrapping SEs ",b,"/",boots))
    tryCatch({
      suppressMessages({
        X_star <- X_s[sample(1:.N,replace = TRUE),]
        mod_full_ <- fixest::feols(model_fe_full,data=X_star,weights=~N_SOLDIERS,vcov="cluster")
        beta_med_ <- coefficients(mod_full_) %>% .[,grep(paste0(xvar_med,collapse="|"),names(.))]
        X_star %>% .[,paste0(yvarz,"_post"):=lapply(seq_along(yvarz),function(y0){apply(.SD*t(beta_med_[y0,]),1,sum)}),.SDcols=xvar_med] %>% .[,paste0(yvarz,"_dm"):=lapply(yvarz,function(y0){get(y0)-get(paste0(y0,"_post"))})]
        mod_seqg_ <- fixest::feols(model_fe_seqg,data=X_star,weights=~N_SOLDIERS,vcov="cluster")
        if("matrix"%in%class(sapply(mod_seqg_,coefficients))){
          betas_boot <- sapply(mod_seqg_,coefficients)%>%.[grep(xxvar,rownames(.)),] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(paste0(yvarz,"_boot"))
        } else {
          betas_boot <- sapply(mod_seqg_,coefficients)%>%.[grep(xxvar,names(.))] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(paste0(yvarz,"_boot"))
        }
        betas_boot
      })
    # Close error catch
    },error=function(e){NULL})
  })%>%data.table::rbindlist()
  if("matrix"%in%class(sapply(mod_seqg,coefficients))){
    betas_mod <- sapply(mod_seqg,coefficients)%>%.[grep(xxvar,rownames(.)),] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(paste0(yvarz,"_est"))
  } else {
    betas_mod <- sapply(mod_seqg,coefficients)%>%.[grep(xxvar,names(.))] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(paste0(yvarz,"_est"))
  }
  se_boot <- apply(mod_seqg_boot,2,sd)
  z.stat <- (betas_mod/se_boot)
  p.2 <- exp(-0.717*abs(z.stat) - 0.416*z.stat^2)
  beta_seqg_boot <- data.table::data.table(outcome=names(betas_mod),beta_seqg=t(betas_mod)[,1],se_boot=se_boot,z_boot=t(z.stat)[,1],p_boot=t(p.2)[,1])
  beta_seqg_boot

  # Compare to OLS
  if("matrix"%in%class(sapply(mod_naive,coefficients))){
    betas_base <- sapply(mod_naive,coefficients)%>%.[grep(xxvar,rownames(.)),] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(paste0(yvarz,"_est"))
  } else {
    betas_base <- sapply(mod_naive,coefficients)%>%.[grep(xxvar,names(.))] %>% t() %>% data.table::as.data.table() %>% data.table::setnames(paste0(yvarz,"_est"))
  }
  se_base <- sapply(mod_naive,function(x){sqrt(diag(stats::vcov(x)))})%>%.[grep(xxvar,row.names(.)),]
  z.stat <- (betas_base/se_base)
  p.2 <- exp(-0.717*abs(z.stat) - 0.416*z.stat^2)
  beta_base <- data.table::data.table(outcome=names(betas_base),beta_ols=t(betas_base)[,1],se_ols=se_base,z_base=t(z.stat)[,1],p_base=t(p.2)[,1])
  beta_base

  # Make Figure A5.8
  beta_mat <- list(
    beta_base %>% data.table::copy() %>% data.table::setnames(stringr::str_split(names(.),"_")%>%sapply("[",1)) %>% .[,grp:="ATE"] %>% .[,outcome:=gsub("_est","",outcome)],
    beta_seqg_boot %>% data.table::copy() %>% data.table::setnames(stringr::str_split(names(.),"_")%>%sapply("[",1)) %>% .[,grp:="ACDE"] %>% .[,outcome:=gsub("_est","",outcome)]
  ) %>% data.table::rbindlist() %>% .[,l:=beta-1.96*se] %>% .[,u:=beta+1.96*se] %>% .[,outcome_label:=outcome%>%match(yvarz)%>%ylabz[.]%>%factor(levels=ylabz%>%rev())] %>% .[,grp:=factor(grp,levels=c("ACDE","ATE"))] 
  p <- ggplot2::ggplot(beta_mat,  ggplot2::aes(group=grp)) +
      ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
      ggplot2::geom_point(ggplot2::aes(x = outcome_label, y = beta, shape=grp), position = ggplot2::position_dodge(width = 1/2), fill = "WHITE")  + 
      ggplot2::geom_linerange(ggplot2::aes(x = outcome_label, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), key_glyph = "path")  + 
      ggplot2::coord_flip() + 
      ggplot2::xlab("") + ggplot2::ylab("") +
      ggplot2::labs(shape = "Estimand:") +
      ggplot2::theme_bw() + 
      ggplot2::guides(shape = ggplot2::guide_legend(reverse=TRUE)) +
      ggplot2::theme(aspect.ratio=1/1.618, legend.position = c(.4,-.1618), legend.direction="horizontal", legend.title = ggplot2::element_text(size = 8), legend.text = ggplot2::element_text(size=8), axis.title.y = ggplot2::element_blank()) 
  w_ <- 3.25*1.618; h_ <- 2.9
  png(paste0("results/figA5_8_",letters[which(xxvarz==xxvar)],".png"), width = w_, height = h_, units = "in",res=300)
  print(p)
  dev.off()
}


###########################
# Make Figure A5.9
###########################

rm(list=ls())
source("code/functions.R")

# Load data
X_ <- readRDS("data/data_divisionlevel.RDS") %>% preprocez()

# Loop over prewar variables
xxvarz <- c("M_10km_ADM3","purged_all_na")
xxlabz <- c("Pre-war repression:","Pre-war purges:")
for(xxvar in xxvarz){

  # New covariates
  X_s <- X_ %>% .[,high_pwr:=1*(get(xxvar)>median(get(xxvar),na.rm=TRUE))] 

  # Interaction models
  model_fe_ix <- Formula::Formula(c(KIA_100,WIA_100,MIA_100,POW_100,DES_100,PUN_100,WMED_100) ~  log(NKVD_OO+1)*high_pwr + scale(AGE_1941) + scale(soldiers_dist) + scale(RUS_100) + scale(URBAN_PCT) + scale(POP_DENSITY) | ARMY + TID + bxid )
  mod <- fixest::feols(model_fe_ix,data=X_s,weights=~N_SOLDIERS,vcov="cluster"); fixest::etable(mod)

  # Extract CIs
  ixmods <- lapply(1:length(mod),function(m0,x0="NKVD_OO \\+ 1\\)$",x1="NKVD_OO \\+ 1\\)\\:high_pwr"){
    list(
      data.table::data.table(
        outcome_name=model_fe_ix[[2]]%>%as.character()%>%.[-1]%>%.[m0],
        grp="Low (below median)",
        b=mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),1],
        l=mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),1]-1.96*mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),2],
        u=mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),1]+1.96*mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),2]
        ),
      data.table::data.table(
        outcome_name=model_fe_ix[[2]]%>%as.character()%>%.[-1]%>%.[m0],
        grp="High (above median)",
        b=mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),1]+mod[[m0]][["coeftable"]]%>%.[grep(x1,row.names(.)),1],
        l=mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),1]+mod[[m0]][["coeftable"]]%>%.[grep(x1,row.names(.)),1]-1.96*ix_se(mod[[m0]][["cov.iid"]],x0,x1),
        u=mod[[m0]][["coeftable"]]%>%.[grep(x0,row.names(.)),1]+mod[[m0]][["coeftable"]]%>%.[grep(x1,row.names(.)),1]+1.96*ix_se(mod[[m0]][["cov.iid"]],x0,x1)
        )
      )%>%data.table::rbindlist()
  })%>%data.table::rbindlist() %>% .[,outcome_name:=factor(outcome_name,levels=yvarz%>%rev())] %>% .[,outcome_label:=outcome_name%>%match(yvarz)%>%ylabz[.]%>%factor(levels=ylabz%>%rev())] %>% .[,grp:=factor(grp,levels=c("Low (below median)","High (above median)"))] 
  ixmods

  # Make Figure A5.9
  p <- ggplot2::ggplot(ixmods,  ggplot2::aes(group=grp)) +
      ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
      ggplot2::geom_point(ggplot2::aes(x = outcome_label, y = b, shape=grp), position = ggplot2::position_dodge(width = 1/2), fill = "WHITE")  + 
      ggplot2::geom_linerange(ggplot2::aes(x = outcome_label, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), key_glyph = "path")  + 
      ggplot2::coord_flip() + 
      ggplot2::xlab("") + ggplot2::ylab("") +
      ggplot2::labs(shape = xxlabz[which(xxvarz==xxvar)]) +
      ggplot2::theme_bw() + 
      ggplot2::guides(shape = ggplot2::guide_legend(reverse=TRUE)) +
      ggplot2::theme(aspect.ratio=1/1.618, legend.position = c(.4,-.1618), legend.direction="horizontal", legend.title = ggplot2::element_text(size = 8), legend.text = ggplot2::element_text(size=8), axis.title.y = ggplot2::element_blank()) 
  w_ <- 3.25*1.618; h_ <- 2.9
  png(paste0("results/figA5_9_",letters[which(xxvarz==xxvar)],".png"), width = w_, height = h_, units = "in",res=300)
  print(p)
  dev.off()
}




##############################################################################
##############################################################################
##############################################################################
## Cross-National Battle-Level Evidence
##
## Figure 5: Impact of Fratricidal Coercion across 526 Battles, 1939-2011.
## Table A6.15: Summary Statistics for Cross-National Battle-Level Data
## Figure A6.10: Cross-National Battle-Level Robustness Tests
## Figure A6.11: Cross-National Battle-Level Mixed Effects Specification
##############################################################################
##############################################################################
##############################################################################

print("Running: code chunk 3")
# Clear workspace, source functions
rm(list=ls())
source("code/functions.R")

##################################
# Load and prepare data
##################################

# Load data
X <- readRDS("data/data_battlelevel.RDS")

# Subset land battles
X <- X %>% .[sea_battle==0&air_battle==0,]

# Weight by battle size
X_ <- X %>% .[,weight:=battle_size] %>% .[,medw := median(weight,na.rm=T)] %>% .[!is.na(weight)]


##################################
# Summary stats table
##################################

# Labels
labz <- data.table::data.table(
  varname=c("kia_mean","wia_mean","mia_mean","pow_mean","ldr_captured_killed","prop_cas3","ler3","blocking","initiator","recruit","rel_cinc","force_ratio","deploy_dist","moredemoc","geneva","opp_geneva","majorbattle","ww2","start_year","end_year","winter","spring","summer","fall"),
  varlabz=c("Killed in Action","Wounded in Action","Missing in Action","Prisoners of War","Commander Killed or Captured","Proportion of Force Lost","Loss-Exchange Ratio","Blocking Units","Initiator","Conscript Army","CINC Ratio","Force Ratio","Deployment Distance","More Democratic","Geneva","Opponent Geneva","Major Battle","WWII","Start Year","End Year","Winter","Spring","Summer","Fall")
  )

# Make Table A6.15
sumstat_tab <- X[,lapply(.SD,function(x){data.table::data.table(
  Min=min(x,na.rm=T)%>%SUNGEO::smart_round(3),
  Max=max(x,na.rm=T)%>%SUNGEO::smart_round(3) %>% format(big.mark = ","),
  Median=median(x,na.rm=T)%>%SUNGEO::smart_round(3),
  Mean=mean(x,na.rm=T)%>%SUNGEO::smart_round(3),
  SD=sd(x,na.rm=T)%>%SUNGEO::smart_round(3),
  N=length(x[!is.na(x)])
  )})%>%data.table::rbindlist() %>% .[,Variable:=labz[,varlabz]]%>%dplyr::select(Variable,dplyr::everything()),.SDcols=labz[,varname]]
xtable::xtable(sumstat_tab,caption="\\textbf{Summary Statistics for Cross-National Battle-Level Data}",label="tab:sum_crossnat") %>% xtable::print.xtable(legend.position="top", format.args = list(big.mark = ","), include.rownames = FALSE,booktabs=TRUE,size="small",file="results/tabA6_15.tex")


##################################
# Model estimation
##################################

# Labels
yvarz <- c("lkia_mean","lwia_mean","lmia_mean","lpow_mean","prop_cas3","logler3")
ylabz <- c("Killed in Action","Wounded in Action","Missing in Action","Prisoners of War","Proportion of Force Lost","Loss Exchange Ratio")

# Model specification
xvar <- "blocking"
model_base <- paste0("y ~ ",xvar," + initiator + recruit + rel_cinc + log_force_ratio + ldeploy_dist + moredemoc + geneva + opp_geneva + majorbattle + ww2 + winter + spring + summer + decade  | 0 | 0 | code + conflict") %>% as.formula() %>% Formula::Formula()
model_fe <- paste0("y ~ ",xvar," + initiator + recruit + rel_cinc + log_force_ratio + ldeploy_dist + moredemoc + geneva + opp_geneva + majorbattle + ww2 + winter + spring + summer + decade | conflict | 0 | code + conflict") %>% as.formula() %>% Formula::Formula()
model_re <- paste0("y ~ ",xvar," + initiator + recruit + rel_cinc + log_force_ratio + ldeploy_dist + moredemoc + geneva + opp_geneva + majorbattle + ww2 + winter + spring + summer + decade + (1|conflict)") %>% as.formula() 

# Estimate models
modz <- parallel::mclapply(1:length(yvarz),function(y0){#print(y0)
    suppressWarnings({
      suppressMessages({
        # Rescale y
        X_ <- X_ %>% .[,y := get(yvarz[y0])]
        if(grepl("^ldr|prop",yvarz[y0])){X_ <- X_ %>% .[,y := scale(y)]}
        # OLS
        mod_base <- lfe::felm(model_base, data = X_, exactDOF=T, weights=X_[,weight]); summary(mod_base)
        betas <- summary(mod_base)["coefficients"][[1]] %>% .[grep(xvar,row.names(.)),]
        m0 <- data.table(outcome_label=ylabz[y0],outcome_name=yvarz[y0],model_name="OLS",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, aic = stats::AIC(mod_base))
        # RE
        mod_re <- lme4::lmer(model_re, data=X_, control = lmerControl(optimizer ="Nelder_Mead"), weights=X_[,weight])
        mod_rre <- robustlmm::rlmer(model_re, data=X_, control = lmerControl(optimizer ="Nelder_Mead"))
        betas <- summary(mod_rre)["coefficients"][[1]] %>% .[grep(xvar,row.names(.)),]
        iccs <- icc_fun(mod_re) %>% .[,.(grp,icc)] %>% t() %>% as.data.table() %>% setnames(.[1,] %>% as.character() %>% paste0("icc_",.)) %>% .[-1]
        fitz <- summary(mod_re)$optinfo$conv$lme4$messages %>% paste0(collapse="; ") %>% (function(.){ifelse(nchar(.)>0,1,0)})
        m1 <- data.table(outcome_label=ylabz[y0],outcome_name=yvarz[y0],model_name="RE",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, aic = stats::AIC(mod_re), reml = summary(mod_re)$AICtab,msg=fitz) %>% bind_cols(iccs)
        # FE
        mod_fe <- lfe::felm(model_fe, data = X_, exactDOF=T, weights=X_[,weight]); summary(mod_fe)
        betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep(xvar,row.names(.)),]
        m2 <- data.table(outcome_label=ylabz[y0],outcome_name=yvarz[y0],model_name="FE",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, aic=stats::AIC(mod_fe))
        ht <- phtest_felm(mod_re,mod_fe)
        m2[,hausman_p := ht$p.value]
        m1[,hausman_p := ht$p.value]
      })
    })
    data.table::rbindlist(list(m0,m1,m2),fill=T)
},mc.cores=parallel::detectCores()-1) %>% data.table::rbindlist()
modz[hausman_p<.05&grepl("FE",model_name),.(outcome_name,model_name,b,l,u)]
modz[hausman_p>.05&grepl("RE",model_name),.(outcome_name,model_name,b,l,u)]


##################################
# Make Figures 5, A6.11
##################################

# Make Figure 5
remods_ <- modz %>% .[grepl("FE",model_name),] %>% .[,outcome_name := factor(outcome_name, levels = rev(yvarz))] %>% mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% setDT() %>% .[,outcome_label := outcome_name %>% match(yvarz) %>% ylabz[.] %>% factor(., levels = rev(ylabz))] 
p <- ggplot2::ggplot(remods_,  ggplot2::aes()) +
    ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    ggplot2::geom_pointrange(aes(x = outcome_label, y = b, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), shape = 16, fatten=1.618, fill = "WHITE")  + 
    ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618, axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),title=ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("") + ggplot2::ylab("") 
w_ <- 3.25*1.618; h_ <- 2.9
png("results/fig5.png", width = w_, height = h_, units = "in",res=300)  
p
dev.off()

# Make Figure A6.11
remods_ <- modz %>% .[grepl("RE",model_name),] %>% .[,outcome_name := factor(outcome_name, levels = rev(yvarz))] %>% mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% setDT() %>% .[,outcome_label := outcome_name %>% match(yvarz) %>% ylabz[.] %>% factor(., levels = rev(ylabz))] 
p <- ggplot2::ggplot(remods_,  ggplot2::aes()) +
    ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    ggplot2::geom_pointrange(aes(x = outcome_label, y = b, ymin = l, ymax = u), lwd = 1/1.618/2, position = ggplot2::position_dodge(width = 1/2), shape = 16, fatten=1.618, fill = "WHITE")  + 
    ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618, axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),title=ggplot2::element_blank()) + ggplot2::coord_flip() + ggplot2::xlab("") + ggplot2::ylab("") 
w_ <- 3.25*1.618; h_ <- 2.9
png("results/figA6_11.png", width = w_, height = h_, units = "in",res=300)  
p
dev.off()


##################################
# Robustness: dropping USSR & WWII
##################################

# Re-estimate models
modz <- parallel::mclapply(1:length(yvarz),function(y0){
    # Base
    X_ <- X %>% .[,y := get(yvarz[y0])] %>% .[,weight:=battle_size] %>% .[,medw := median(weight,na.rm=T)] %>% .[!is.na(weight)]
    if(grepl("^ldr",yvarz[y0])){X_ %>% .[,y := scale(y)]}
    mod_fe <- lfe::felm(model_fe, data = X_, exactDOF=T, weights=X_[,weight]); summary(mod_fe)
    betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep(xvar,row.names(.)),]
    m0 <- data.table(outcome_label=ylabz[y0],outcome_name=yvarz[y0],model_name="Original",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, aic = stats::AIC(mod_fe))
    # No USSR
    X_ <- X[name!="USSR",] %>% .[,y := get(yvarz[y0])] %>% .[,weight:=battle_size] %>% .[,medw := median(weight,na.rm=T)] %>% .[!is.na(weight)]
    if(grepl("^ldr",yvarz[y0])){X_ %>% .[,y := scale(y)]}
    mod_fe <- lfe::felm(model_fe, data = X_, exactDOF=T, weights=X_[,weight]); summary(mod_fe)
    betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep(xvar,row.names(.)),]
    m1 <- data.table(outcome_label=ylabz[y0],outcome_name=yvarz[y0],model_name="No USSR",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, aic = stats::AIC(mod_fe))
    # No WWII
    X_ <- X[ww2!=1,] %>% .[,y := get(yvarz[y0])] %>% .[,weight:=battle_size] %>% .[,medw := median(weight,na.rm=T)] %>% .[!is.na(weight)]
    if(grepl("^ldr",yvarz[y0])){X_ %>% .[,y := scale(y)]}
    mod_fe <- lfe::felm(model_fe, data = X_, exactDOF=T, weights=X_[,weight]); summary(mod_fe)
    betas <- summary(mod_fe)["coefficients"][[1]] %>% .[grep(xvar,row.names(.)),]
    m2 <- data.table(outcome_label=ylabz[y0],outcome_name=yvarz[y0],model_name="No WWII",b=betas[1],l=betas[1]-betas[2]*1.96,u=betas[1]+betas[2]*1.96, l90=betas[1]-betas[2]*1.68,u90=betas[1]+betas[2]*1.68, aic = stats::AIC(mod_fe))
    data.table::rbindlist(list(m0,m1,m2),fill=T)
},mc.cores=parallel::detectCores()-1) %>% data.table::rbindlist() 

# Make Figure A6.10
remods_ <- modz  %>% .[,outcome_name := factor(outcome_name, levels = rev(yvarz))] %>% mutate_at(names(.) %>% grep("^icc",.,value=T),as.numeric) %>% setDT() %>% .[,outcome_label := outcome_name %>% match(yvarz) %>% ylabz[.] %>% factor(., levels = rev(ylabz))] 
p <- ggplot2::ggplot(remods_,  ggplot2::aes()) +
    ggplot2::geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    ggplot2::geom_pointrange(aes(x=outcome_label, y=b, ymin=l, ymax=u, shape=model_name), lwd=1/1.618/2, position = ggplot2::position_dodge(width = .6), fatten=1.618, fill = "WHITE",key_glyph = "point")  +
    ggplot2::theme_bw() + ggplot2::theme(aspect.ratio=1/1.618, legend.position = "bottom", legend.direction = "horizontal",legend.title = ggplot2::element_blank(),legend.text=ggplot2::element_text(size=10),legend.margin=ggplot2::margin(-10, 0, 0, 0), axis.title.x=ggplot2::element_blank(), axis.title.y = ggplot2::element_blank(),title=ggplot2::element_blank(),axis.text=element_text(size=10)) + ggplot2::coord_flip() + ggplot2::xlab("") + ggplot2::ylab("") +
    ggplot2::scale_shape_manual(values=0:2,guide=ggplot2::guide_legend(reverse=T,override.aes = list(size = 3))) 
w_ <- 4.25*1.618; h_ <- 4.25
png("results/figA6_10.png", width = w_, height = h_, units = "in",res=300) 
p
dev.off()


##############################################################################
##############################################################################
##############################################################################
## Cross-National War-Level Evidence
##
## Table A6.16: Battle losses and strategic-level outcomes.
## Table A6.17: Blocking units and strategic-level outcomes.
##############################################################################
##############################################################################
##############################################################################

print("Running: code chunk 4")
# Clear workspace, source functions
rm(list=ls())
source("code/functions.R")

##################################
# Load and prepare data
##################################

# Load data
X <- readRDS("data/data_warlevel.RDS")

##################################
# Validation exercise
##################################

# Model specification
mod_mean <- Formula::as.Formula(as.formula(c(outcome_win,outcome_lose,outcome_other) ~ log(kia_mean+1) + log(wia_mean+1) + log(mia_mean+1) + log(pow_mean+1) + cinc + log(deploy_dist+1) + polity2_new + opp_geneva  | name + decade))
regx <- "kwia_mean\\b|kia_mean\\b|\\bwia_mean\\b|\\bmia_mean\\b|\\bpow_mean\\b|\\bler\\d{1}\\b|\\bprop_cas\\d{1}\\b"

# Model estimation
suppressWarnings({
  suppressMessages({
    mod_spec <- mod_mean
    mod <- fixest::feols(mod_spec, data=X, cluster=~name+conflict); fixest::etable(mod)
    mod1 <- betamat(mod,regx=regx,model_name="Model 1")
    mod_prop <- Formula::as.Formula(as.formula(c(outcome_win,outcome_lose,outcome_other) ~ prop_cas3 + cinc + log(deploy_dist+1) + polity2_new + opp_geneva | name + decade))
    mod_spec <- mod_prop
    mod <- fixest::feols(mod_spec, data=X, cluster=~name + conflict); fixest::etable(mod)
    mod2 <- betamat(mod,regx=regx,model_name="Model 2")
    mod_ler <- Formula::as.Formula(as.formula(c(outcome_win,outcome_lose,outcome_other) ~ log(ler2+1) + cinc + log(deploy_dist+1) + polity2_new + opp_geneva | name + decade))
    mod_spec <- mod_ler
    mod <- fixest::feols(mod_spec, data=X, cluster=~name + conflict); fixest::etable(mod)
    mod3 <- betamat(mod,regx=regx,model_name="Model 3")
    mod_out <- list(mod1,mod2,mod3)%>%data.table::rbindlist()
  })
})

# Make Table A6.16
yvarz <- c("outcome_win","outcome_lose")
ylabz <- c("Victory","Defeat")
xvarz <- c("kia_mean","wia_mean","mia_mean","pow_mean","prop_cas3","ler2")
xlabz <- c("Killed in Action","Wounded in Action","Missing in Action","Prisoners of War","Proportion of Force Lost","Loss Exchange Ratio")
labz <- data.table(rn=c("outcome_name","b","ci",paste0("b_",xvarz),paste0("ci_",xvarz),"r2","aic","bic","name_fe","decade_fe","n"),rn_lab=c("Outcome","Estimate","95% CI",xlabz,rep("",length(xvarz)),"R-squared","AIC","BIC","Country FE","Decade FE","N"))
capz_fe <- paste0("\\textbf{Battle losses and strategic-level outcomes}. Linear probability model coefficient estimates reported, with 95\\% confidence intervals in parentheses. Covariates KIA, WIA, MIA, POW and LER are on a logarithnic scale. Robust stardard errors are clustered by country and conflict. Coefficient estimates for other covariates omitted.\\\\") 
lbz_fe <- paste0("tab:fe_outcome1")
fnz <- "tabA6_16.tex"
suppressWarnings({
  suppressMessages({
    ytabz <- lapply(yvarz,function(y0){#print(y0)
      regtab <- mod_out %>% data.table::copy() %>% .[outcome_name%in%y0] %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")]  %>% .[,b := SUNGEO::smart_round(b,3)] %>% .[,r2 := r2 %>% SUNGEO::smart_round(2)] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]
      regtab_x_ <- nrow(regtab)
      regtab <- lapply(1:3,function(m0){
        regtab_x <- lapply(1:length(regtab[grepl(m0,model_name),x]),function(i0){
          estz <- regtab[grepl(m0,model_name)&x%in%regtab[grepl(m0,model_name),x[i0]],.(b,ci)] %>% data.table::setnames(paste0(c("b_","ci_"),regtab[grepl(m0,model_name),x[i0]]))
          estz
        }) %>% dplyr::bind_cols()
        dplyr::bind_cols(regtab[grepl(m0,model_name)],regtab_x)
      }) %>% data.table::rbindlist(fill=TRUE) %>% .[,.SD,.SDcols=c("model_name","outcome_name",grep("^b_|^ci_",names(.),value=TRUE),"name_fe","decade_fe","r2","aic","n")] %>% unique() %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,]  %>% .[,model_name := model_name %>% match(labz[,rn]) %>% labz[.,rn_lab]] %>% data.table::setnames("model_name","")
      regtab
    }) %>% dplyr::bind_cols() %>% .[,-5] %>% data.table::setnames(c("",paste0("Model ",1:6)))
  })
})
ytabz%>% xtable::xtable(caption=capz_fe,label=lbz_fe,align=paste0("lr",paste0(rep("c",ncol(ytabz)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(0,1,length(xvarz)+7),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),size="footnotesize",caption.placement="top")


##################################
# War outcomes
##################################

# Model specification
mod_b <- Formula::as.Formula(as.formula(c(outcome_win,outcome_lose,outcome_other) ~ blocking + cinc + log(deploy_dist+1) + polity2_new + opp_geneva  | name + decade))
regx <- "kwia_mean\\b|kia_mean\\b|\\bwia_mean\\b|\\bmia_mean\\b|\\bpow_mean\\b|\\bler\\d{1}\\b|\\bprop_cas\\d{1}\\b|blocking"

# Model estimation
mod_spec <- mod_b
mod <- fixest::feols(mod_spec, data=X, cluster=~name+conflict); fixest::etable(mod)
mod_out <- betamat(mod,regx=regx) %>% .[,model_name := paste0("Model ",1:.N)]

# Make Table A6.17
xvarz <- c("blocking")
xlabz <- c("Blocking Units")
yvarz <- c("outcome_win","outcome_lose","outcome_other")
ylabz <- c("Victory","Defeat","Other")
labz <- data.table(rn=c("outcome_name","b","ci",paste0("b_",xvarz),paste0("ci_",xvarz),"r2","aic","bic","name_fe","decade_fe","n"),rn_lab=c("Outcome","Estimate","95% CI",xlabz,rep("",length(xvarz)),"R-squared","AIC","BIC","Country FE","Decade FE","N"))
capz_fe <- paste0("\\textbf{Blocking units and strategic-level outcomes}. Linear probability model coefficient estimates reported, with 95\\% confidence intervals in parentheses. Robust stardard errors clustered by country and conflict. Coefficient estimates for other covariates omitted.\\\\"); lbz_fe <- paste0("tab:fe_b")
fnz <- paste0("tabA6_17.tex")
regtab <- mod_out %>% data.table::copy() %>% .[,ci := paste0("(",l%>%SUNGEO::smart_round(1),",",u%>%SUNGEO::smart_round(1),")")]  %>% .[,b := SUNGEO::smart_round(b,3)] %>% .[,r2 := r2 %>% SUNGEO::smart_round(2)] %>% .[,aic := aic %>% SUNGEO::smart_round(1)]
regtab <- lapply(1:3,function(m0){
  regtab_x <- lapply(1:length(regtab[grepl(m0,model_name),x]),function(i0){
    estz <- regtab[grepl(m0,model_name)&x%in%regtab[grepl(m0,model_name),x[i0]],.(b,ci)] %>% data.table::setnames(paste0(c("b_","ci_"),regtab[grepl(m0,model_name),x[i0]]))
    estz
  }) %>% dplyr::bind_cols()
  dplyr::bind_cols(regtab[grepl(m0,model_name)],regtab_x)
}) %>% data.table::rbindlist(fill=TRUE) %>% .[,.SD,.SDcols=c("model_name","outcome_name",grep("^b_|^ci_",names(.),value=TRUE),"name_fe","decade_fe","r2","aic","n")] %>% unique() %>% dplyr::select(tidyselect::where(~ any(!is.na(.)))) %>% dplyr::select(tidyselect::where(~ any(.!="")))%>% .[,outcome_name := outcome_name %>% match(yvarz) %>% ylabz[.]] %>% t() %>% data.table::as.data.table(keep.rownames=T) %>% data.table::setnames(.[1,] %>% as.character()) %>% .[-1,]  %>% .[,model_name := model_name %>% match(labz[,rn]) %>% labz[.,rn_lab]] %>% data.table::setnames("model_name","")
regtab%>% xtable::xtable(caption=capz_fe,label=lbz_fe,align=paste0("lr",paste0(rep("c",ncol(regtab)-1),collapse=""))) %>% print(include.rownames=FALSE, booktabs=TRUE,math.style.negative=TRUE,hline.after=c(-1,1,2*length(xvarz)+1),sanitize.colnames.function=function(x){gsub("outcome_name","Outcome",x)},file=paste0("results/",fnz),size="footnotesize",caption.placement="top")



##############################################################################
##############################################################################

# Print upon finishing
print("**** Finished run2_main.R ****")
